From a9322f6488b432ddc1e89be88242c827c633fb63 Mon Sep 17 00:00:00 2001
From: Stefan Assmann <sassmann@suse.de>
Date: Wed, 11 Jun 2008 16:35:14 +0200
Subject: x86, pci: introduce pci=noioapicquirk kernel cmdline option

Introduce pci=noioapicquirk kernel cmdline option to disable all boot
interrupt quirks

Signed-off-by: Stefan Assmann <sassmann@suse.de>
Signed-off-by: Olaf Dabrunz <od@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/pci/common.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 940185ecaeda..bc6a101ed7ec 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -22,6 +22,7 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
 
 static int pci_bf_sort;
 int pci_routeirq;
+int noioapicquirk;
 int pcibios_last_bus = -1;
 unsigned long pirq_table_addr;
 struct pci_bus *pci_root_bus;
@@ -495,6 +496,9 @@ char * __devinit  pcibios_setup(char *str)
 	} else if (!strcmp(str, "skip_isa_align")) {
 		pci_probe |= PCI_CAN_SKIP_ISA_ALIGN;
 		return NULL;
+	} else if (!strcmp(str, "noioapicquirk")) {
+		noioapicquirk = 1;
+		return NULL;
 	}
 	return str;
 }
-- 
cgit v1.2.3-70-g09d2


From 9197979b518573999d52d9e85bce1680682ed85c Mon Sep 17 00:00:00 2001
From: Stefan Assmann <sassmann@suse.de>
Date: Wed, 11 Jun 2008 16:35:15 +0200
Subject: x86, pci: introduce pci=ioapicreroute kernel cmdline option

Introduce pci=ioapicreroute kernel cmdline option to enable rerouting of boot
interrupts to the primary io-apic.

Signed-off-by: Stefan Assmann <sassmann@suse.de>
Signed-off-by: Olaf Dabrunz <od@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/kernel-parameters.txt | 4 ++++
 arch/x86/pci/common.c               | 5 +++++
 include/asm-x86/io_apic.h           | 4 ++++
 include/asm-x86/pci.h               | 1 +
 4 files changed, 14 insertions(+)

(limited to 'arch/x86')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1aebe9dffbaa..df262b3c3d6e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1521,6 +1521,10 @@ and is between 256 and 4096 characters. It is defined in the file
 		noioapicquirk	[APIC] Disable all boot interrupt quirks.
 				Safety option to keep boot IRQs enabled. This
 				should never be necessary.
+		ioapicreroute	[APIC] Enable rerouting of boot IRQs to the
+				primary IO-APIC for bridges that cannot disable
+				boot IRQs. This fixes a source of spurious IRQs
+				when the system masks IRQs.
 		biosirq		[X86-32] Use PCI BIOS calls to get the interrupt
 				routing table. These calls are known to be buggy
 				on several machines and they hang the machine
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index bc6a101ed7ec..0a9eaa736d94 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -23,6 +23,7 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
 static int pci_bf_sort;
 int pci_routeirq;
 int noioapicquirk;
+int noioapicreroute = 1;
 int pcibios_last_bus = -1;
 unsigned long pirq_table_addr;
 struct pci_bus *pci_root_bus;
@@ -499,6 +500,10 @@ char * __devinit  pcibios_setup(char *str)
 	} else if (!strcmp(str, "noioapicquirk")) {
 		noioapicquirk = 1;
 		return NULL;
+	} else if (!strcmp(str, "ioapicreroute")) {
+		if (noioapicreroute != -1)
+			noioapicreroute = 0;
+		return NULL;
 	}
 	return str;
 }
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index 8ca0110819f4..a39670ae17df 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -160,12 +160,16 @@ extern int skip_ioapic_setup;
 /* 1 if "noapic" boot option passed */
 extern int noioapicquirk;
 
+/* -1 if "noapic" boot option passed */
+extern int noioapicreroute;
+
 /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */
 extern int timer_through_8259;
 
 static inline void disable_ioapic_setup(void)
 {
 	noioapicquirk = 1;
+	noioapicreroute = -1;
 	skip_ioapic_setup = 1;
 }
 
diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h
index 30eec93a845e..52a29f7668ef 100644
--- a/include/asm-x86/pci.h
+++ b/include/asm-x86/pci.h
@@ -20,6 +20,7 @@ struct pci_sysdata {
 
 extern int pci_routeirq;
 extern int noioapicquirk;
+extern int ioapicreroute;
 
 /* scan a bus after allocating a pci_sysdata for it */
 extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
-- 
cgit v1.2.3-70-g09d2


From 41b9eb264c8407655db57b60b4457fe1b2ec9977 Mon Sep 17 00:00:00 2001
From: Stefan Assmann <sassmann@suse.de>
Date: Tue, 15 Jul 2008 13:48:55 +0200
Subject: x86, pci: introduce config option for pci reroute quirks (was: [PATCH
 0/3] Boot IRQ quirks for Broadcom and AMD/ATI)

This is against linux-2.6-tip, branch pci-ioapic-boot-irq-quirks.

From: Stefan Assmann <sassmann@suse.de>
Subject: Introduce config option for pci reroute quirks

The config option X86_REROUTE_FOR_BROKEN_BOOT_IRQS is introduced to
enable (or disable) the redirection of the interrupt handler to the boot
interrupt line by default. Depending on the existence of interrupt
masking / threaded interrupt handling in the kernel (vanilla, rt, ...)
and the maturity of the rerouting patch, users can enable or disable the
redirection by default.

This means that the reroute quirk can be applied to any kernel without
changing it.

Interrupt sharing could be increased if this option is enabled. However this
option is vital for threaded interrupt handling, as done by the RT kernel.
It should simplify the consolidation with the RT kernel.

The option can be overridden by either pci=ioapicreroute or
pci=noioapicreroute.

Signed-off-by: Stefan Assmann <sassmann@suse.de>
Signed-off-by: Olaf Dabrunz <od@suse.de>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Jon Masters <jonathan@jonmasters.org>
Cc: Ihno Krumreich <ihno@suse.de>
Cc: Sven Dietrich <sdietrich@suse.de>
Cc: Daniel Gollub <dgollub@suse.de>
Cc: Felix Foerster <ffoerster@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/kernel-parameters.txt |  4 ++++
 arch/x86/Kconfig                    | 24 ++++++++++++++++++++++++
 arch/x86/pci/common.c               |  8 ++++++++
 drivers/pci/quirks.c                |  2 +-
 include/asm-x86/pci.h               |  2 +-
 5 files changed, 38 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index f5662b7a34d1..62b6e8067a5b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1536,6 +1536,10 @@ and is between 256 and 4096 characters. It is defined in the file
 				primary IO-APIC for bridges that cannot disable
 				boot IRQs. This fixes a source of spurious IRQs
 				when the system masks IRQs.
+		noioapicreroute	[APIC] Disable workaround that uses the
+				boot IRQ equivalent of an IRQ that connects to
+				a chipset where boot IRQs cannot be disabled.
+				The opposite of ioapicreroute.
 		biosirq		[X86-32] Use PCI BIOS calls to get the interrupt
 				routing table. These calls are known to be buggy
 				on several machines and they hang the machine
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 96e0c2ebc388..09521332636b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -665,6 +665,30 @@ config X86_VISWS_APIC
 	def_bool y
 	depends on X86_32 && X86_VISWS
 
+config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
+	bool "Reroute for broken boot IRQs"
+	default n
+	depends on X86_IO_APIC
+	help
+	  This option enables a workaround that fixes a source of
+	  spurious interrupts. This is recommended when threaded
+	  interrupt handling is used on systems where the generation of
+	  superfluous "boot interrupts" cannot be disabled.
+
+	  Some chipsets generate a legacy INTx "boot IRQ" when the IRQ
+	  entry in the chipset's IO-APIC is masked (as, e.g. the RT
+	  kernel does during interrupt handling). On chipsets where this
+	  boot IRQ generation cannot be disabled, this workaround keeps
+	  the original IRQ line masked so that only the equivalent "boot
+	  IRQ" is delivered to the CPUs. The workaround also tells the
+	  kernel to set up the IRQ handler on the boot IRQ line. In this
+	  way only one interrupt is delivered to the kernel. Otherwise
+	  the spurious second interrupt may cause the kernel to bring
+	  down (vital) interrupt lines.
+
+	  Only affects "broken" chipsets. Interrupt sharing may be
+	  increased on these systems.
+
 config X86_MCE
 	bool "Machine Check Exception"
 	depends on !X86_VOYAGER
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 1485a26ddcef..bb1a01f089e2 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -24,7 +24,11 @@ unsigned int pci_early_dump_regs;
 static int pci_bf_sort;
 int pci_routeirq;
 int noioapicquirk;
+#ifdef CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS
+int noioapicreroute = 0;
+#else
 int noioapicreroute = 1;
+#endif
 int pcibios_last_bus = -1;
 unsigned long pirq_table_addr;
 struct pci_bus *pci_root_bus;
@@ -528,6 +532,10 @@ char * __devinit  pcibios_setup(char *str)
 		if (noioapicreroute != -1)
 			noioapicreroute = 0;
 		return NULL;
+	} else if (!strcmp(str, "noioapicreroute")) {
+		if (noioapicreroute != -1)
+			noioapicreroute = 1;
+		return NULL;
 	}
 	return str;
 }
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 0911b0c60b64..c880dd0bbfb5 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -1397,7 +1397,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x260b, quirk_intel_pcie_pm);
  */
 static void quirk_reroute_to_boot_interrupts_intel(struct pci_dev *dev)
 {
-	if (noioapicquirk)
+	if (noioapicquirk || noioapicreroute)
 		return;
 
 	dev->irq_reroute_variant = INTEL_IRQ_REROUTE_VARIANT;
diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h
index 52a29f7668ef..9584d6d5eb93 100644
--- a/include/asm-x86/pci.h
+++ b/include/asm-x86/pci.h
@@ -20,7 +20,7 @@ struct pci_sysdata {
 
 extern int pci_routeirq;
 extern int noioapicquirk;
-extern int ioapicreroute;
+extern int noioapicreroute;
 
 /* scan a bus after allocating a pci_sysdata for it */
 extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
-- 
cgit v1.2.3-70-g09d2


From b0f209898f1a177bd503d49215b8c6628797a81c Mon Sep 17 00:00:00 2001
From: Russ Anderson <rja@sgi.com>
Date: Tue, 21 Oct 2008 14:09:51 -0500
Subject: x86, uv: use consistent names for region size and conherence id on
 x86 and ia64

Use consistent names for region size and conherence id on x86 and ia64.

The SGI xp drivers are used on both ia64 and x86.  Using the same
names (sn_coherency_id, sn_region_size) simplies the driver code.

Signed-off-by: Russ Anderson <rja@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/bios_uv.c        | 8 ++++----
 arch/x86/kernel/genx2apic_uv_x.c | 4 ++--
 include/asm-x86/uv/bios.h        | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index f0dfe6f17e7e..7cefb7170e75 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -69,10 +69,10 @@ s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
 
 long sn_partition_id;
 EXPORT_SYMBOL_GPL(sn_partition_id);
-long uv_coherency_id;
-EXPORT_SYMBOL_GPL(uv_coherency_id);
-long uv_region_size;
-EXPORT_SYMBOL_GPL(uv_region_size);
+long sn_coherency_id;
+EXPORT_SYMBOL_GPL(sn_coherency_id);
+long sn_region_size;
+EXPORT_SYMBOL_GPL(sn_region_size);
 int uv_type;
 
 
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index bfd532843df6..6cf35c8bd636 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -429,7 +429,7 @@ void __init uv_system_init(void)
 
 	uv_bios_init();
 	uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
-			    &uv_coherency_id, &uv_region_size);
+			    &sn_coherency_id, &sn_region_size);
 	uv_rtc_init();
 
 	for_each_present_cpu(cpu) {
@@ -451,7 +451,7 @@ void __init uv_system_init(void)
 		uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
 		uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
 		uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
-		uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id;
+		uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
 		uv_node_to_blade[nid] = blade;
 		uv_cpu_to_blade[cpu] = blade;
 		max_pnode = max(pnode, max_pnode);
diff --git a/include/asm-x86/uv/bios.h b/include/asm-x86/uv/bios.h
index 215f1969c266..7b3d7022c639 100644
--- a/include/asm-x86/uv/bios.h
+++ b/include/asm-x86/uv/bios.h
@@ -85,9 +85,9 @@ extern void uv_bios_init(void);
 
 extern int uv_type;
 extern long sn_partition_id;
-extern long uv_coherency_id;
-extern long uv_region_size;
-#define partition_coherence_id()	(uv_coherency_id)
+extern long sn_coherency_id;
+extern long sn_region_size;
+#define partition_coherence_id()	(sn_coherency_id)
 
 extern struct kobject *sgi_uv_kobj;	/* /sys/firmware/sgi_uv */
 
-- 
cgit v1.2.3-70-g09d2


From 9e899816d126cc6f7d405c349f65363214fe7399 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 22 Oct 2008 12:33:16 +0200
Subject: x86, mm: enable GBPAGES option by default

DIRECT_GBPAGES was under DEBUG_KERNEL && EXPERIMENTAL and disabled by default.
Turn it on by default and put it under EMBEDDED.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig       |  9 +++++++++
 arch/x86/Kconfig.debug | 12 ------------
 2 files changed, 9 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5b9b12321ad1..c00aefcb47d5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -946,6 +946,15 @@ config X86_PAE
 config ARCH_PHYS_ADDR_T_64BIT
        def_bool X86_64 || X86_PAE
 
+config DIRECT_GBPAGES
+	bool "Enable 1GB pages for kernel pagetables" if EMBEDDED
+	default y
+	depends on X86_64
+	help
+	  Allow the kernel linear mapping to use 1GB pages on CPUs that
+	  support it. This can improve the kernel's performance a tiny bit by
+	  reducing TLB pressure. If in doubt, say "Y".
+
 # Common NUMA Features
 config NUMA
 	bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2a3dfbd5e677..567fe543e09c 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -114,18 +114,6 @@ config DEBUG_RODATA
 	  data. This is recommended so that we can catch kernel bugs sooner.
 	  If in doubt, say "Y".
 
-config DIRECT_GBPAGES
-	bool "Enable gbpages-mapped kernel pagetables"
-	depends on DEBUG_KERNEL && EXPERIMENTAL && X86_64
-	help
-	  Enable gigabyte pages support (if the CPU supports it). This can
-	  improve the kernel's performance a tiny bit by reducing TLB
-	  pressure.
-
-	  This is experimental code.
-
-	  If in doubt, say "N".
-
 config DEBUG_RODATA_TEST
 	bool "Testcase for the DEBUG_RODATA feature"
 	depends on DEBUG_RODATA
-- 
cgit v1.2.3-70-g09d2


From b4b8f87bf4958cbad620654efc0882ac46c19846 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@fastmail.fm>
Date: Wed, 22 Oct 2008 12:00:08 +0200
Subject: i386, dumpstack: move crash_kexec before bust_spinlocks(0) in
 oops_end

crash_kexec should not be called with console_sem held. Move
the call before bust_spinlocks(0) in oops_end to avoid the
problem.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Acked-by: "Neil Horman" <nhorman@tuxdriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/dumpstack_32.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index b3614752197b..5493d31be4e5 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -309,6 +309,9 @@ unsigned __kprobes long oops_begin(void)
 
 void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 {
+	if (regs && kexec_should_crash(current))
+		crash_kexec(regs);
+
 	bust_spinlocks(0);
 	die_owner = -1;
 	add_taint(TAINT_DIE);
@@ -318,8 +321,6 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 	if (!regs)
 		return;
 
-	if (kexec_should_crash(current))
-		crash_kexec(regs);
 	if (in_interrupt())
 		panic("Fatal exception in interrupt");
 	if (panic_on_oops)
-- 
cgit v1.2.3-70-g09d2


From 874d93d11823b2b861addac6a5dc31162e924ab2 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@fastmail.fm>
Date: Wed, 22 Oct 2008 12:00:09 +0200
Subject: x86, dumpstack: let signr=0 signal no do_exit

Change oops_end such that signr=0 signals that do_exit
is not to be called.

Currently, each use of __die is soon followed by a call
to oops_end and 'regs' is set to NULL if oops_end is expected
not to call do_exit. Change all such pairs to set signr=0
instead. On x86_64 oops_end is used 'bare' in die_nmi; use
signr=0 instead of regs=NULL there, too.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/dumpstack_32.c |  7 ++++---
 arch/x86/kernel/dumpstack_64.c |  9 +++++----
 arch/x86/mm/fault.c            | 11 +++++++----
 3 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 5493d31be4e5..7c22f99f0efb 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -318,7 +318,7 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 	__raw_spin_unlock(&die_lock);
 	raw_local_irq_restore(flags);
 
-	if (!regs)
+	if (!signr)
 		return;
 
 	if (in_interrupt())
@@ -371,17 +371,18 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 void die(const char *str, struct pt_regs *regs, long err)
 {
 	unsigned long flags = oops_begin();
+	int sig = SIGSEGV;
 
 	if (die_nest_count < 3) {
 		report_bug(regs->ip, regs);
 
 		if (__die(str, regs, err))
-			regs = NULL;
+			sig = 0;
 	} else {
 		printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
 	}
 
-	oops_end(flags, regs, SIGSEGV);
+	oops_end(flags, regs, sig);
 }
 
 static DEFINE_SPINLOCK(nmi_print_lock);
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 96a5db7da8a7..ffefea611ba3 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -465,7 +465,7 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 		/* Nest count reaches zero, release the lock. */
 		__raw_spin_unlock(&die_lock);
 	raw_local_irq_restore(flags);
-	if (!regs) {
+	if (!signr) {
 		oops_exit();
 		return;
 	}
@@ -509,13 +509,14 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 void die(const char *str, struct pt_regs *regs, long err)
 {
 	unsigned long flags = oops_begin();
+	int sig = SIGSEGV;
 
 	if (!user_mode(regs))
 		report_bug(regs->ip, regs);
 
 	if (__die(str, regs, err))
-		regs = NULL;
-	oops_end(flags, regs, SIGSEGV);
+		sig = 0;
+	oops_end(flags, regs, sig);
 }
 
 notrace __kprobes void
@@ -539,7 +540,7 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic)
 		crash_kexec(regs);
 	if (do_panic || panic_on_oops)
 		panic("Non maskable interrupt");
-	oops_end(flags, NULL, SIGBUS);
+	oops_end(flags, regs, 0);
 	nmi_exit();
 	local_irq_enable();
 	do_exit(SIGBUS);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 31e8730fa246..20ef272c412c 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -413,6 +413,7 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
 				 unsigned long error_code)
 {
 	unsigned long flags = oops_begin();
+	int sig = SIGKILL;
 	struct task_struct *tsk;
 
 	printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
@@ -423,8 +424,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
 	tsk->thread.trap_no = 14;
 	tsk->thread.error_code = error_code;
 	if (__die("Bad pagetable", regs, error_code))
-		regs = NULL;
-	oops_end(flags, regs, SIGKILL);
+		sig = 0;
+	oops_end(flags, regs, sig);
 }
 #endif
 
@@ -590,6 +591,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	int fault;
 #ifdef CONFIG_X86_64
 	unsigned long flags;
+	int sig;
 #endif
 
 	tsk = current;
@@ -849,11 +851,12 @@ no_context:
 	bust_spinlocks(0);
 	do_exit(SIGKILL);
 #else
+	sig = SIGKILL;
 	if (__die("Oops", regs, error_code))
-		regs = NULL;
+		sig = 0;
 	/* Executive summary in case the body of the oops scrolled away */
 	printk(KERN_EMERG "CR2: %016lx\n", address);
-	oops_end(flags, regs, SIGKILL);
+	oops_end(flags, regs, sig);
 #endif
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 0ed7a498f416dcfa1cca478a559238a2a3396240 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@fastmail.fm>
Date: Wed, 22 Oct 2008 12:00:10 +0200
Subject: x86_64, dumpstack: move kexec_crash from __die to oops_end

oops_end is preceded by either a call to __die, or a conditional
call to crash_kexec. Move the conditional call to crash_kexec
from the end of __die to the start of oops_end and remove
the superfluous call to crash_kexec in die_nmi.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/dumpstack_64.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index ffefea611ba3..57ce11b895ce 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -458,6 +458,9 @@ unsigned __kprobes long oops_begin(void)
 
 void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 {
+	if (regs && kexec_should_crash(current))
+		crash_kexec(regs);
+
 	die_owner = -1;
 	bust_spinlocks(0);
 	die_nest_count--;
@@ -501,8 +504,6 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 	printk(KERN_ALERT "RIP ");
 	printk_address(regs->ip, 1);
 	printk(" RSP <%016lx>\n", regs->sp);
-	if (kexec_should_crash(current))
-		crash_kexec(regs);
 	return 0;
 }
 
@@ -536,11 +537,9 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic)
 	printk(" on CPU%d, ip %08lx, registers:\n",
 		smp_processor_id(), regs->ip);
 	show_registers(regs);
-	if (kexec_should_crash(current))
-		crash_kexec(regs);
+	oops_end(flags, regs, 0);
 	if (do_panic || panic_on_oops)
 		panic("Non maskable interrupt");
-	oops_end(flags, regs, 0);
 	nmi_exit();
 	local_irq_enable();
 	do_exit(SIGBUS);
-- 
cgit v1.2.3-70-g09d2


From 10b14cb7eb7dd5bff8023f76a55c8ac20e586128 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@fastmail.fm>
Date: Wed, 22 Oct 2008 12:00:11 +0200
Subject: x86, dumpstack: always call oops_exit from oops_end

Always call oops_exit from oops_end, even if signr==0.

Also, move add_taint(TAINT_DIE) from __die to oops_end
on x86_64 and interchange two lines to make oops_end
more similar to the i386-version.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/dumpstack_32.c |  2 +-
 arch/x86/kernel/dumpstack_64.c | 11 +++++------
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 7c22f99f0efb..a29b88ffa346 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -318,6 +318,7 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 	__raw_spin_unlock(&die_lock);
 	raw_local_irq_restore(flags);
 
+	oops_exit();
 	if (!signr)
 		return;
 
@@ -325,7 +326,6 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 		panic("Fatal exception in interrupt");
 	if (panic_on_oops)
 		panic("Fatal exception");
-	oops_exit();
 	do_exit(signr);
 }
 
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 57ce11b895ce..dc6162bf7454 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -461,22 +461,22 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 	if (regs && kexec_should_crash(current))
 		crash_kexec(regs);
 
-	die_owner = -1;
 	bust_spinlocks(0);
+	die_owner = -1;
+	add_taint(TAINT_DIE);
 	die_nest_count--;
 	if (!die_nest_count)
 		/* Nest count reaches zero, release the lock. */
 		__raw_spin_unlock(&die_lock);
 	raw_local_irq_restore(flags);
-	if (!signr) {
-		oops_exit();
+	oops_exit();
+
+	if (!signr)
 		return;
-	}
 	if (in_interrupt())
 		panic("Fatal exception in interrupt");
 	if (panic_on_oops)
 		panic("Fatal exception");
-	oops_exit();
 	do_exit(signr);
 }
 
@@ -499,7 +499,6 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 		return 1;
 
 	show_registers(regs);
-	add_taint(TAINT_DIE);
 	/* Executive summary in case the oops scrolled away */
 	printk(KERN_ALERT "RIP ");
 	printk_address(regs->ip, 1);
-- 
cgit v1.2.3-70-g09d2


From e4955cfd2f5c81eb708f55769aa60173f207fd63 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@fastmail.fm>
Date: Wed, 22 Oct 2008 12:00:12 +0200
Subject: i386, dumpstack: use x86_64's method to account die_nest_count

oops_begin/oops_end should always be used in pairs. On x86_64
oops_begin increments die_nest_count, and oops_end decrements
die_nest_count. Doing this makes oops_begin and oops_end equal
to the x86_64 versions.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/dumpstack_32.c | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index a29b88ffa346..7c7d691b32be 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -289,21 +289,24 @@ static unsigned int die_nest_count;
 
 unsigned __kprobes long oops_begin(void)
 {
+	int cpu;
 	unsigned long flags;
 
 	oops_enter();
 
-	if (die_owner != raw_smp_processor_id()) {
-		console_verbose();
-		raw_local_irq_save(flags);
-		__raw_spin_lock(&die_lock);
-		die_owner = smp_processor_id();
-		die_nest_count = 0;
-		bust_spinlocks(1);
-	} else {
-		raw_local_irq_save(flags);
+	/* racy, but better than risking deadlock. */
+	raw_local_irq_save(flags);
+	cpu = smp_processor_id();
+	if (!__raw_spin_trylock(&die_lock)) {
+		if (cpu == die_owner)
+			/* nested oops. should stop eventually */;
+		else
+			__raw_spin_lock(&die_lock);
 	}
 	die_nest_count++;
+	die_owner = cpu;
+	console_verbose();
+	bust_spinlocks(1);
 	return flags;
 }
 
@@ -315,13 +318,15 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 	bust_spinlocks(0);
 	die_owner = -1;
 	add_taint(TAINT_DIE);
-	__raw_spin_unlock(&die_lock);
+	die_nest_count--;
+	if (!die_nest_count)
+		/* Nest count reaches zero, release the lock. */
+		__raw_spin_unlock(&die_lock);
 	raw_local_irq_restore(flags);
-
 	oops_exit();
+
 	if (!signr)
 		return;
-
 	if (in_interrupt())
 		panic("Fatal exception in interrupt");
 	if (panic_on_oops)
-- 
cgit v1.2.3-70-g09d2


From e06ca430c3d0fddbd1c901ab3fb3e1f0bc8a786b Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@fastmail.fm>
Date: Wed, 22 Oct 2008 12:00:13 +0200
Subject: i386, dumpstack: use oops_begin/oops_end in die_nmi

Use oops_begin and oops_end in die_nmi.

Whitespace-only changes on x86_64, to make it equal to i386's
version.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/dumpstack_32.c | 33 +++++++++++----------------------
 arch/x86/kernel/dumpstack_64.c |  4 ++--
 2 files changed, 13 insertions(+), 24 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 7c7d691b32be..e91ae34f9684 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -390,40 +390,29 @@ void die(const char *str, struct pt_regs *regs, long err)
 	oops_end(flags, regs, sig);
 }
 
-static DEFINE_SPINLOCK(nmi_print_lock);
-
 void notrace __kprobes
 die_nmi(char *str, struct pt_regs *regs, int do_panic)
 {
+	unsigned long flags;
+
 	if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
 		return;
 
-	spin_lock(&nmi_print_lock);
 	/*
-	* We are in trouble anyway, lets at least try
-	* to get a message out:
-	*/
-	bust_spinlocks(1);
+	 * We are in trouble anyway, lets at least try
+	 * to get a message out.
+	 */
+	flags = oops_begin();
 	printk(KERN_EMERG "%s", str);
 	printk(" on CPU%d, ip %08lx, registers:\n",
 		smp_processor_id(), regs->ip);
 	show_registers(regs);
-	if (do_panic)
+	oops_end(flags, regs, 0);
+	if (do_panic || panic_on_oops)
 		panic("Non maskable interrupt");
-	console_silent();
-	spin_unlock(&nmi_print_lock);
-
-	/*
-	 * If we are in kernel we are probably nested up pretty bad
-	 * and might aswell get out now while we still can:
-	 */
-	if (!user_mode_vm(regs)) {
-		current->thread.trap_no = 2;
-		crash_kexec(regs);
-	}
-
-	bust_spinlocks(0);
-	do_exit(SIGSEGV);
+	nmi_exit();
+	local_irq_enable();
+	do_exit(SIGBUS);
 }
 
 static int __init oops_setup(char *s)
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index dc6162bf7454..831e1e159cb4 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -519,7 +519,7 @@ void die(const char *str, struct pt_regs *regs, long err)
 	oops_end(flags, regs, sig);
 }
 
-notrace __kprobes void
+void notrace __kprobes
 die_nmi(char *str, struct pt_regs *regs, int do_panic)
 {
 	unsigned long flags;
@@ -527,11 +527,11 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic)
 	if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
 		return;
 
-	flags = oops_begin();
 	/*
 	 * We are in trouble anyway, lets at least try
 	 * to get a message out.
 	 */
+	flags = oops_begin();
 	printk(KERN_EMERG "%s", str);
 	printk(" on CPU%d, ip %08lx, registers:\n",
 		smp_processor_id(), regs->ip);
-- 
cgit v1.2.3-70-g09d2


From 871d3779cba18b028e34d0d2f6cc6caae76a97b6 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@fastmail.fm>
Date: Wed, 22 Oct 2008 12:00:14 +0200
Subject: i386, dumpstack: unify die()

Make i386's die() equal to x86_64's version.

Whitespace-only changes on x86_64, to make it equal to i386's
version. (user_mode and user_mode_vm are equal on x86_64.)

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/dumpstack_32.c | 10 +++-------
 arch/x86/kernel/dumpstack_64.c |  6 +++++-
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index e91ae34f9684..f2046c5752d0 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -378,15 +378,11 @@ void die(const char *str, struct pt_regs *regs, long err)
 	unsigned long flags = oops_begin();
 	int sig = SIGSEGV;
 
-	if (die_nest_count < 3) {
+	if (!user_mode_vm(regs))
 		report_bug(regs->ip, regs);
 
-		if (__die(str, regs, err))
-			sig = 0;
-	} else {
-		printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
-	}
-
+	if (__die(str, regs, err))
+		sig = 0;
 	oops_end(flags, regs, sig);
 }
 
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 831e1e159cb4..28c67aae5562 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -506,12 +506,16 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 	return 0;
 }
 
+/*
+ * This is gone through when something in the kernel has done something bad
+ * and is about to be terminated:
+ */
 void die(const char *str, struct pt_regs *regs, long err)
 {
 	unsigned long flags = oops_begin();
 	int sig = SIGSEGV;
 
-	if (!user_mode(regs))
+	if (!user_mode_vm(regs))
 		report_bug(regs->ip, regs);
 
 	if (__die(str, regs, err))
-- 
cgit v1.2.3-70-g09d2


From 63fb70859f987f2b3b8028fa467fd63336315e9c Mon Sep 17 00:00:00 2001
From: Sitsofe Wheeler <sitsofe@yahoo.com>
Date: Sat, 11 Oct 2008 20:27:53 +0100
Subject: x86: change OPTIMIZE_INLINING help to say enabling makes smaller
 kernels

Impact: clarify Kconfig help text

The OPTIMIZE_INLINING help currently says "The gcc 4.x series have a
rewritten inlining algorithm and disabling this option will generate a
smaller kernel there."

This contradicts other parts of the help text and my own tests:

  5463127 2008-10-11 19:51 vmlinux.no-opt
  5456152 2008-10-11 19:56 vmlinux.opt

Reword text to say that enabling OPTIMIZE_INLINING will lead to smaller
kernels with gcc 4.x or later.

Signed-off-by: Sitsofe Wheeler <sitsofe@yahoo.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig.debug | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2a3dfbd5e677..2be1e6b8e18b 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -307,10 +307,10 @@ config OPTIMIZE_INLINING
 	  developers have marked 'inline'. Doing so takes away freedom from gcc to
 	  do what it thinks is best, which is desirable for the gcc 3.x series of
 	  compilers. The gcc 4.x series have a rewritten inlining algorithm and
-	  disabling this option will generate a smaller kernel there. Hopefully
-	  this algorithm is so good that allowing gcc4 to make the decision can
-	  become the default in the future, until then this option is there to
-	  test gcc for this.
+	  enabling this option will generate a smaller kernel there. Hopefully
+	  this algorithm is so good that allowing gcc 4.x and above to make the
+	  decision will become the default in the future. Until then this option
+	  is there to test gcc for this.
 
 	  If unsure, say N.
 
-- 
cgit v1.2.3-70-g09d2


From 8479d94e9f6a44b5050cbacf653272a561fbe0d0 Mon Sep 17 00:00:00 2001
From: Mikael Pettersson <mikpe@it.uu.se>
Date: Mon, 27 Oct 2008 09:30:57 +0100
Subject: x86, signals: remove duplicated register setup code in ia32 signal
 delivery

Impact: cleanup, no functionality changed

ia32_setup_rt_frame() has a duplicated code block labelled
"Make -mregparm=3 work" for setting up the register parameters
to the user-mode signal handler.

This is harmless but ugly. Remove the redundant assignments.

Signed-off-by: Mikael Pettersson <mikpe@it.uu.se>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 4bc02b23674b..e82ebd652263 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -572,11 +572,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	regs->dx = (unsigned long) &frame->info;
 	regs->cx = (unsigned long) &frame->uc;
 
-	/* Make -mregparm=3 work */
-	regs->ax = sig;
-	regs->dx = (unsigned long) &frame->info;
-	regs->cx = (unsigned long) &frame->uc;
-
 	loadsegment(ds, __USER32_DS);
 	loadsegment(es, __USER32_DS);
 
-- 
cgit v1.2.3-70-g09d2


From 7f1baa063e2582dd52d83bb31508e9e84468c666 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Fri, 24 Oct 2008 15:24:29 -0700
Subject: x86/uv: provide a System Activity Indicator driver

Impact: start per CPU heartbeat LED timers on SGI UV systems

The SGI UV system has no LEDS but uses one of the system controller
regs to indicate the online internal state of the cpu.  There is a
heartbeat bit indicating that the cpu is responding to interrupts,
and an idle bit indicating whether the cpu is idle when the heartbeat
interrupt occurs.  The current period is one second.

When a cpu panics, an error code is written by BIOS to this same reg.

This patchset provides the following:

  * x86_64: Add base functionality for writing to the specific SCIR's
    for each cpu.

  * heartbeat: Invert "heartbeat" bit to indicate the cpu is
    "interruptible".  If the current thread is the idle thread,
    then indicate system is "idle".

  * if hotplug enabled, all bits are set (0xff) when the cpu is disabled.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uv/uv_hub.h |  63 +++++++++++++++++++++++-
 arch/x86/kernel/genx2apic_uv_x.c | 102 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 164 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index c6ad93e315c8..400776dba9b5 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -112,6 +112,16 @@
  */
 #define UV_MAX_NASID_VALUE	(UV_MAX_NUMALINK_NODES * 2)
 
+struct uv_scir_s {
+	struct timer_list timer;
+	unsigned long	offset;
+	unsigned long	last;
+	unsigned long	idle_on;
+	unsigned long	idle_off;
+	unsigned char	state;
+	unsigned char	enabled;
+};
+
 /*
  * The following defines attributes of the HUB chip. These attributes are
  * frequently referenced and are kept in the per-cpu data areas of each cpu.
@@ -130,7 +140,9 @@ struct uv_hub_info_s {
 	unsigned char	blade_processor_id;
 	unsigned char	m_val;
 	unsigned char	n_val;
+	struct uv_scir_s scir;
 };
+
 DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
 #define uv_hub_info 		(&__get_cpu_var(__uv_hub_info))
 #define uv_cpu_hub_info(cpu)	(&per_cpu(__uv_hub_info, cpu))
@@ -162,6 +174,30 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
 
 #define UV_APIC_PNODE_SHIFT	6
 
+/* Local Bus from cpu's perspective */
+#define LOCAL_BUS_BASE		0x1c00000
+#define LOCAL_BUS_SIZE		(4 * 1024 * 1024)
+
+/*
+ * System Controller Interface Reg
+ *
+ * Note there are NO leds on a UV system.  This register is only
+ * used by the system controller to monitor system-wide operation.
+ * There are 64 regs per node.  With Nahelem cpus (2 cores per node,
+ * 8 cpus per core, 2 threads per cpu) there are 32 cpu threads on
+ * a node.
+ *
+ * The window is located at top of ACPI MMR space
+ */
+#define SCIR_WINDOW_COUNT	64
+#define SCIR_LOCAL_MMR_BASE	(LOCAL_BUS_BASE + \
+				 LOCAL_BUS_SIZE - \
+				 SCIR_WINDOW_COUNT)
+
+#define SCIR_CPU_HEARTBEAT	0x01	/* timer interrupt */
+#define SCIR_CPU_ACTIVITY	0x02	/* not idle */
+#define SCIR_CPU_HB_INTERVAL	(HZ)	/* once per second */
+
 /*
  * Macros for converting between kernel virtual addresses, socket local physical
  * addresses, and UV global physical addresses.
@@ -276,6 +312,16 @@ static inline void uv_write_local_mmr(unsigned long offset, unsigned long val)
 	*uv_local_mmr_address(offset) = val;
 }
 
+static inline unsigned char uv_read_local_mmr8(unsigned long offset)
+{
+	return *((unsigned char *)uv_local_mmr_address(offset));
+}
+
+static inline void uv_write_local_mmr8(unsigned long offset, unsigned char val)
+{
+	*((unsigned char *)uv_local_mmr_address(offset)) = val;
+}
+
 /*
  * Structures and definitions for converting between cpu, node, pnode, and blade
  * numbers.
@@ -350,5 +396,20 @@ static inline int uv_num_possible_blades(void)
 	return uv_possible_blades;
 }
 
-#endif /* _ASM_X86_UV_UV_HUB_H */
+/* Update SCIR state */
+static inline void uv_set_scir_bits(unsigned char value)
+{
+	if (uv_hub_info->scir.state != value) {
+		uv_hub_info->scir.state = value;
+		uv_write_local_mmr8(uv_hub_info->scir.offset, value);
+	}
+}
+static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
+{
+	if (uv_cpu_hub_info(cpu)->scir.state != value) {
+		uv_cpu_hub_info(cpu)->scir.state = value;
+		uv_write_local_mmr8(uv_cpu_hub_info(cpu)->scir.offset, value);
+	}
+}
 
+#endif /* _ASM_X86_UV_UV_HUB_H */
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index dc6b46961523..84367d84bb10 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -10,6 +10,7 @@
 
 #include <linux/kernel.h>
 #include <linux/threads.h>
+#include <linux/cpu.h>
 #include <linux/cpumask.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
@@ -18,6 +19,8 @@
 #include <linux/bootmem.h>
 #include <linux/module.h>
 #include <linux/hardirq.h>
+#include <linux/timer.h>
+#include <asm/current.h>
 #include <asm/smp.h>
 #include <asm/ipi.h>
 #include <asm/genapic.h>
@@ -356,6 +359,103 @@ static __init void uv_rtc_init(void)
 		sn_rtc_cycles_per_second = ticks_per_sec;
 }
 
+/*
+ * percpu heartbeat timer
+ */
+static void uv_heartbeat(unsigned long ignored)
+{
+	struct timer_list *timer = &uv_hub_info->scir.timer;
+	unsigned char bits = uv_hub_info->scir.state;
+
+	/* flip heartbeat bit */
+	bits ^= SCIR_CPU_HEARTBEAT;
+
+	/* are we the idle thread? */
+	if (current->pid == 0)
+		bits &= ~SCIR_CPU_ACTIVITY;
+	else
+		bits |= SCIR_CPU_ACTIVITY;
+
+	/* update system controller interface reg */
+	uv_set_scir_bits(bits);
+
+	/* enable next timer period */
+	mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
+}
+
+static void __cpuinit uv_heartbeat_enable(int cpu)
+{
+	if (!uv_cpu_hub_info(cpu)->scir.enabled) {
+		struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
+
+		uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
+		setup_timer(timer, uv_heartbeat, cpu);
+		timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
+		add_timer_on(timer, cpu);
+		uv_cpu_hub_info(cpu)->scir.enabled = 1;
+	}
+
+	/* check boot cpu */
+	if (!uv_cpu_hub_info(0)->scir.enabled)
+		uv_heartbeat_enable(0);
+}
+
+static void __cpuinit uv_heartbeat_disable(int cpu)
+{
+	if (uv_cpu_hub_info(cpu)->scir.enabled) {
+		uv_cpu_hub_info(cpu)->scir.enabled = 0;
+		del_timer(&uv_cpu_hub_info(cpu)->scir.timer);
+	}
+	uv_set_cpu_scir_bits(cpu, 0xff);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * cpu hotplug notifier
+ */
+static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self,
+				       unsigned long action, void *hcpu)
+{
+	long cpu = (long)hcpu;
+
+	switch (action) {
+	case CPU_ONLINE:
+		uv_heartbeat_enable(cpu);
+		break;
+	case CPU_DOWN_PREPARE:
+		uv_heartbeat_disable(cpu);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static __init void uv_scir_register_cpu_notifier(void)
+{
+	hotcpu_notifier(uv_scir_cpu_notify, 0);
+}
+
+#else /* !CONFIG_HOTPLUG_CPU */
+
+static __init void uv_scir_register_cpu_notifier(void)
+{
+}
+
+static __init int uv_init_heartbeat(void)
+{
+	int cpu;
+
+	if (is_uv_system())
+		for_each_online_cpu(cpu)
+			uv_heartbeat_enable(cpu);
+	return 0;
+}
+
+late_initcall(uv_init_heartbeat);
+
+#endif /* !CONFIG_HOTPLUG_CPU */
+
 /*
  * Called on each cpu to initialize the per_cpu UV data area.
  * 	ZZZ hotplug not supported yet
@@ -452,6 +552,7 @@ void __init uv_system_init(void)
 		uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
 		uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
 		uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
+		uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu;
 		uv_node_to_blade[nid] = blade;
 		uv_cpu_to_blade[cpu] = blade;
 		max_pnode = max(pnode, max_pnode);
@@ -468,4 +569,5 @@ void __init uv_system_init(void)
 	map_mmioh_high(max_pnode);
 
 	uv_cpu_init();
+	uv_scir_register_cpu_notifier();
 }
-- 
cgit v1.2.3-70-g09d2


From 709110bd5624094992579f5311541f2e2b7ce58a Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Thu, 23 Oct 2008 17:14:25 -0700
Subject: x86: signal: cosmetic unification of restore_sigcontext()

Impact: cleanup

Make restore_sigcontext() the same.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 22 ++++++++++++++++++++++
 arch/x86/kernel/signal_64.c | 15 +++++++++++++++
 2 files changed, 37 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index d6dd057d0f22..85a0d37cdae9 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -149,14 +149,36 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
+#ifdef CONFIG_X86_32
 	GET_SEG(gs);
 	COPY_SEG(fs);
 	COPY_SEG(es);
 	COPY_SEG(ds);
+#endif /* CONFIG_X86_32 */
+
 	COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
 	COPY(dx); COPY(cx); COPY(ip);
+
+#ifdef CONFIG_X86_64
+	COPY(r8);
+	COPY(r9);
+	COPY(r10);
+	COPY(r11);
+	COPY(r12);
+	COPY(r13);
+	COPY(r14);
+	COPY(r15);
+#endif /* CONFIG_X86_64 */
+
+#ifdef CONFIG_X86_32
 	COPY_SEG_STRICT(cs);
 	COPY_SEG_STRICT(ss);
+#else /* !CONFIG_X86_32 */
+	/* Kernel saves and restores only the CS segment register on signals,
+	 * which is the bare minimum needed to allow mixed 32/64-bit code.
+	 * App's signal handler can save/restore other segments if needed. */
+	COPY_SEG_STRICT(cs);
+#endif /* CONFIG_X86_32 */
 
 	err |= __get_user(tmpflags, &sc->flags);
 	regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index a5c9627f4db9..9c469da7f9e8 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -76,8 +76,17 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
+#ifdef CONFIG_X86_32
+	GET_SEG(gs);
+	COPY_SEG(fs);
+	COPY_SEG(es);
+	COPY_SEG(ds);
+#endif /* CONFIG_X86_32 */
+
 	COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
 	COPY(dx); COPY(cx); COPY(ip);
+
+#ifdef CONFIG_X86_64
 	COPY(r8);
 	COPY(r9);
 	COPY(r10);
@@ -86,11 +95,17 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 	COPY(r13);
 	COPY(r14);
 	COPY(r15);
+#endif /* CONFIG_X86_64 */
 
+#ifdef CONFIG_X86_32
+	COPY_SEG_STRICT(cs);
+	COPY_SEG_STRICT(ss);
+#else /* !CONFIG_X86_32 */
 	/* Kernel saves and restores only the CS segment register on signals,
 	 * which is the bare minimum needed to allow mixed 32/64-bit code.
 	 * App's signal handler can save/restore other segments if needed. */
 	COPY_SEG_STRICT(cs);
+#endif /* CONFIG_X86_32 */
 
 	err |= __get_user(tmpflags, &sc->flags);
 	regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
-- 
cgit v1.2.3-70-g09d2


From fd4a2030a358b4818646031049d9631bd45b9915 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Thu, 23 Oct 2008 17:15:28 -0700
Subject: x86: signal_64.c: get_stack() doesn't need entire regs

Impact: cleanup

get_stack() uses sp only, entire regs is not needed.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_64.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 9c469da7f9e8..3d0deb336745 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -199,12 +199,10 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
  */
 
 static void __user *
-get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
+get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size)
 {
-	unsigned long sp;
-
 	/* Default to using normal stack - redzone*/
-	sp = regs->sp - 128;
+	sp -= 128;
 
 	/* This is the X/Open sanctioned signal stack switching.  */
 	if (ka->sa.sa_flags & SA_ONSTACK) {
@@ -224,14 +222,14 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	struct task_struct *me = current;
 
 	if (used_math()) {
-		fp = get_stack(ka, regs, sig_xstate_size);
+		fp = get_stack(ka, regs->sp, sig_xstate_size);
 		frame = (void __user *)round_down(
 			(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
 
 		if (save_i387_xstate(fp) < 0)
 			return -EFAULT;
 	} else
-		frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
+		frame = get_stack(ka, regs->sp, sizeof(struct rt_sigframe)) - 8;
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		return -EFAULT;
-- 
cgit v1.2.3-70-g09d2


From c63dfefd48d92b1db3400fe8de4886a519ac3949 Mon Sep 17 00:00:00 2001
From: Dan McGee <dpmcgee@gmail.com>
Date: Thu, 23 Oct 2008 15:44:02 -0500
Subject: x86: remove dead IRQBALANCE code

Impact: cleanup

CONFIG_IRQBALANCE was removed in commit 8b8e8c1bf; this ifdef was still
around.

Signed-off-by: Dan McGee <dpmcgee@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/irq.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index bae0eda95486..28e409fc73f3 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -31,10 +31,6 @@ static inline int irq_canonicalize(int irq)
 # endif
 #endif
 
-#ifdef CONFIG_IRQBALANCE
-extern int irqbalance_disable(char *str);
-#endif
-
 #ifdef CONFIG_HOTPLUG_CPU
 #include <linux/cpumask.h>
 extern void fixup_irqs(cpumask_t map);
-- 
cgit v1.2.3-70-g09d2


From 04d2aac33eb54fd3084140f2db130530d71e97c6 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sun, 5 Oct 2008 11:08:10 -0700
Subject: x86: corruption-check: fix some style issues

Impact: cleanup

Before moving the code to it's own file, fix some style issues
in the corruption check code.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0fa6790c1dd3..4f38e0305b07 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -662,7 +662,7 @@ static void __init setup_bios_corruption_check(void)
 
 	corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
 
-	while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
+	while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
 		u64 size;
 		addr = find_e820_area_size(addr, &size, PAGE_SIZE);
 
@@ -701,11 +701,11 @@ void check_for_bios_corruption(void)
 	if (!memory_corruption_check)
 		return;
 
-	for(i = 0; i < num_scan_areas; i++) {
+	for (i = 0; i < num_scan_areas; i++) {
 		unsigned long *addr = __va(scan_areas[i].addr);
 		unsigned long size = scan_areas[i].size;
 
-		for(; size; addr++, size -= sizeof(unsigned long)) {
+		for (; size; addr++, size -= sizeof(unsigned long)) {
 			if (!*addr)
 				continue;
 			printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
@@ -721,7 +721,8 @@ void check_for_bios_corruption(void)
 static void periodic_check_for_corruption(unsigned long data)
 {
 	check_for_bios_corruption();
-	mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ));
+	mod_timer(&periodic_check_timer,
+		round_jiffies(jiffies + corruption_check_period*HZ));
 }
 
 void start_periodic_check_for_corruption(void)
-- 
cgit v1.2.3-70-g09d2


From 6784f7d0a5016a397d38be1134e63fc784c1ca8e Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sun, 5 Oct 2008 11:33:42 -0700
Subject: x86: corruption check: move the corruption checks into their own file

Impact: cleanup

The corruption check code is rather sizable and it's likely to grow over
time when we add checks for more types of corruptions (there's a few
candidates in kerneloops.org that I want to add checks for)... so lets move
it to its own file

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/setup.h |   4 ++
 arch/x86/kernel/Makefile     |   1 +
 arch/x86/kernel/check.c      | 158 +++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/setup.c      | 152 -----------------------------------------
 4 files changed, 163 insertions(+), 152 deletions(-)
 create mode 100644 arch/x86/kernel/check.c

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index f12d37237465..1ed8b2e80727 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -8,6 +8,10 @@
 /* Interrupt control for vSMPowered x86_64 systems */
 void vsmp_init(void);
 
+
+void setup_bios_corruption_check(void);
+
+
 #ifdef CONFIG_X86_VISWS
 extern void visws_early_detect(void);
 extern int is_visws_box(void);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d7e5a58ee22f..31fbcaf3df70 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -35,6 +35,7 @@ obj-y			+= bootflag.o e820.o
 obj-y			+= pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
 obj-y			+= alternative.o i8253.o pci-nommu.o
 obj-y			+= tsc.o io_delay.o rtc.o
+obj-y			+= check.o
 
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline.o
 obj-y				+= process.o
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
new file mode 100644
index 000000000000..5056703e1b05
--- /dev/null
+++ b/arch/x86/kernel/check.c
@@ -0,0 +1,158 @@
+#include <linux/module.h>
+#include <linux/sched.h>
+
+#include <asm/e820.h>
+#include <asm/proto.h>
+
+/*
+ * Some BIOSes seem to corrupt the low 64k of memory during events
+ * like suspend/resume and unplugging an HDMI cable.  Reserve all
+ * remaining free memory in that area and fill it with a distinct
+ * pattern.
+ */
+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
+#define MAX_SCAN_AREAS	8
+
+static int __read_mostly memory_corruption_check = -1;
+
+static unsigned __read_mostly corruption_check_size = 64*1024;
+static unsigned __read_mostly corruption_check_period = 60; /* seconds */
+
+static struct e820entry scan_areas[MAX_SCAN_AREAS];
+static int num_scan_areas;
+
+
+static int set_corruption_check(char *arg)
+{
+	char *end;
+
+	memory_corruption_check = simple_strtol(arg, &end, 10);
+
+	return (*end == 0) ? 0 : -EINVAL;
+}
+early_param("memory_corruption_check", set_corruption_check);
+
+static int set_corruption_check_period(char *arg)
+{
+	char *end;
+
+	corruption_check_period = simple_strtoul(arg, &end, 10);
+
+	return (*end == 0) ? 0 : -EINVAL;
+}
+early_param("memory_corruption_check_period", set_corruption_check_period);
+
+static int set_corruption_check_size(char *arg)
+{
+	char *end;
+	unsigned size;
+
+	size = memparse(arg, &end);
+
+	if (*end == '\0')
+		corruption_check_size = size;
+
+	return (size == corruption_check_size) ? 0 : -EINVAL;
+}
+early_param("memory_corruption_check_size", set_corruption_check_size);
+
+
+void __init setup_bios_corruption_check(void)
+{
+	u64 addr = PAGE_SIZE;	/* assume first page is reserved anyway */
+
+	if (memory_corruption_check == -1) {
+		memory_corruption_check =
+#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
+			1
+#else
+			0
+#endif
+			;
+	}
+
+	if (corruption_check_size == 0)
+		memory_corruption_check = 0;
+
+	if (!memory_corruption_check)
+		return;
+
+	corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
+
+	while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
+		u64 size;
+		addr = find_e820_area_size(addr, &size, PAGE_SIZE);
+
+		if (addr == 0)
+			break;
+
+		if ((addr + size) > corruption_check_size)
+			size = corruption_check_size - addr;
+
+		if (size == 0)
+			break;
+
+		e820_update_range(addr, size, E820_RAM, E820_RESERVED);
+		scan_areas[num_scan_areas].addr = addr;
+		scan_areas[num_scan_areas].size = size;
+		num_scan_areas++;
+
+		/* Assume we've already mapped this early memory */
+		memset(__va(addr), 0, size);
+
+		addr += size;
+	}
+
+	printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
+	       num_scan_areas);
+	update_e820();
+}
+
+static struct timer_list periodic_check_timer;
+
+void check_for_bios_corruption(void)
+{
+	int i;
+	int corruption = 0;
+
+	if (!memory_corruption_check)
+		return;
+
+	for (i = 0; i < num_scan_areas; i++) {
+		unsigned long *addr = __va(scan_areas[i].addr);
+		unsigned long size = scan_areas[i].size;
+
+		for (; size; addr++, size -= sizeof(unsigned long)) {
+			if (!*addr)
+				continue;
+			printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
+			       addr, __pa(addr), *addr);
+			corruption = 1;
+			*addr = 0;
+		}
+	}
+
+	WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n");
+}
+
+static void periodic_check_for_corruption(unsigned long data)
+{
+	check_for_bios_corruption();
+	mod_timer(&periodic_check_timer,
+		round_jiffies(jiffies + corruption_check_period*HZ));
+}
+
+void start_periodic_check_for_corruption(void)
+{
+	if (!memory_corruption_check || corruption_check_period == 0)
+		return;
+
+	printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
+	       corruption_check_period);
+
+	init_timer(&periodic_check_timer);
+	periodic_check_timer.function = &periodic_check_for_corruption;
+	periodic_check_for_corruption(0);
+}
+#endif
+
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 4f38e0305b07..af690aa593a9 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -587,158 +587,6 @@ static struct x86_quirks default_x86_quirks __initdata;
 
 struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
 
-/*
- * Some BIOSes seem to corrupt the low 64k of memory during events
- * like suspend/resume and unplugging an HDMI cable.  Reserve all
- * remaining free memory in that area and fill it with a distinct
- * pattern.
- */
-#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
-#define MAX_SCAN_AREAS	8
-
-static int __read_mostly memory_corruption_check = -1;
-
-static unsigned __read_mostly corruption_check_size = 64*1024;
-static unsigned __read_mostly corruption_check_period = 60; /* seconds */
-
-static struct e820entry scan_areas[MAX_SCAN_AREAS];
-static int num_scan_areas;
-
-
-static int set_corruption_check(char *arg)
-{
-	char *end;
-
-	memory_corruption_check = simple_strtol(arg, &end, 10);
-
-	return (*end == 0) ? 0 : -EINVAL;
-}
-early_param("memory_corruption_check", set_corruption_check);
-
-static int set_corruption_check_period(char *arg)
-{
-	char *end;
-
-	corruption_check_period = simple_strtoul(arg, &end, 10);
-
-	return (*end == 0) ? 0 : -EINVAL;
-}
-early_param("memory_corruption_check_period", set_corruption_check_period);
-
-static int set_corruption_check_size(char *arg)
-{
-	char *end;
-	unsigned size;
-
-	size = memparse(arg, &end);
-
-	if (*end == '\0')
-		corruption_check_size = size;
-
-	return (size == corruption_check_size) ? 0 : -EINVAL;
-}
-early_param("memory_corruption_check_size", set_corruption_check_size);
-
-
-static void __init setup_bios_corruption_check(void)
-{
-	u64 addr = PAGE_SIZE;	/* assume first page is reserved anyway */
-
-	if (memory_corruption_check == -1) {
-		memory_corruption_check =
-#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
-			1
-#else
-			0
-#endif
-			;
-	}
-
-	if (corruption_check_size == 0)
-		memory_corruption_check = 0;
-
-	if (!memory_corruption_check)
-		return;
-
-	corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
-
-	while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
-		u64 size;
-		addr = find_e820_area_size(addr, &size, PAGE_SIZE);
-
-		if (addr == 0)
-			break;
-
-		if ((addr + size) > corruption_check_size)
-			size = corruption_check_size - addr;
-
-		if (size == 0)
-			break;
-
-		e820_update_range(addr, size, E820_RAM, E820_RESERVED);
-		scan_areas[num_scan_areas].addr = addr;
-		scan_areas[num_scan_areas].size = size;
-		num_scan_areas++;
-
-		/* Assume we've already mapped this early memory */
-		memset(__va(addr), 0, size);
-
-		addr += size;
-	}
-
-	printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
-	       num_scan_areas);
-	update_e820();
-}
-
-static struct timer_list periodic_check_timer;
-
-void check_for_bios_corruption(void)
-{
-	int i;
-	int corruption = 0;
-
-	if (!memory_corruption_check)
-		return;
-
-	for (i = 0; i < num_scan_areas; i++) {
-		unsigned long *addr = __va(scan_areas[i].addr);
-		unsigned long size = scan_areas[i].size;
-
-		for (; size; addr++, size -= sizeof(unsigned long)) {
-			if (!*addr)
-				continue;
-			printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
-			       addr, __pa(addr), *addr);
-			corruption = 1;
-			*addr = 0;
-		}
-	}
-
-	WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n");
-}
-
-static void periodic_check_for_corruption(unsigned long data)
-{
-	check_for_bios_corruption();
-	mod_timer(&periodic_check_timer,
-		round_jiffies(jiffies + corruption_check_period*HZ));
-}
-
-void start_periodic_check_for_corruption(void)
-{
-	if (!memory_corruption_check || corruption_check_period == 0)
-		return;
-
-	printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
-	       corruption_check_period);
-
-	init_timer(&periodic_check_timer);
-	periodic_check_timer.function = &periodic_check_for_corruption;
-	periodic_check_for_corruption(0);
-}
-#endif
-
 static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
 {
 	printk(KERN_NOTICE
-- 
cgit v1.2.3-70-g09d2


From 304e629bf4a3150a0bf6556fc45c52c5c082340f Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Sun, 5 Oct 2008 12:09:03 -0700
Subject: x86: corruption check: run the corruption checks from a work queue

Impact: change the implementation of the debug feature

the periodic corruption checks are better off run from a work queue; there's
nothing time critical about them and this way the amount of
interrupt-context work is reduced.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/check.c | 27 +++++++++++++++++----------
 arch/x86/mm/init_32.c   |  2 --
 arch/x86/mm/init_64.c   |  2 --
 3 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 5056703e1b05..55eed1752b43 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -1,6 +1,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
-
+#include <linux/kthread.h>
+#include <linux/workqueue.h>
 #include <asm/e820.h>
 #include <asm/proto.h>
 
@@ -108,13 +109,14 @@ void __init setup_bios_corruption_check(void)
 	update_e820();
 }
 
-static struct timer_list periodic_check_timer;
 
 void check_for_bios_corruption(void)
 {
 	int i;
 	int corruption = 0;
 
+	printk("dot\n");
+
 	if (!memory_corruption_check)
 		return;
 
@@ -135,24 +137,29 @@ void check_for_bios_corruption(void)
 	WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n");
 }
 
-static void periodic_check_for_corruption(unsigned long data)
+static void check_corruption(struct work_struct *dummy);
+static DECLARE_DELAYED_WORK(bios_check_work, check_corruption);
+
+static void check_corruption(struct work_struct *dummy)
 {
 	check_for_bios_corruption();
-	mod_timer(&periodic_check_timer,
-		round_jiffies(jiffies + corruption_check_period*HZ));
+	schedule_delayed_work(&bios_check_work,
+		round_jiffies_relative(corruption_check_period*HZ)); 
 }
 
-void start_periodic_check_for_corruption(void)
+static int start_periodic_check_for_corruption(void)
 {
 	if (!memory_corruption_check || corruption_check_period == 0)
-		return;
+		return 0;
 
 	printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
 	       corruption_check_period);
 
-	init_timer(&periodic_check_timer);
-	periodic_check_timer.function = &periodic_check_for_corruption;
-	periodic_check_for_corruption(0);
+	/* First time we run the checks right away */
+	schedule_delayed_work(&bios_check_work, 0);
+	return 0;
 }
+
+module_init(start_periodic_check_for_corruption);
 #endif
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8396868e82c5..5e6377560ff1 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -970,8 +970,6 @@ void __init mem_init(void)
 	int codesize, reservedpages, datasize, initsize;
 	int tmp;
 
-	start_periodic_check_for_corruption();
-
 #ifdef CONFIG_FLATMEM
 	BUG_ON(!mem_map);
 #endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index b8e461d49412..d6ef1589b95a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -879,8 +879,6 @@ void __init mem_init(void)
 {
 	long codesize, reservedpages, datasize, initsize;
 
-	start_periodic_check_for_corruption();
-
 	pci_iommu_alloc();
 
 	/* clear_bss() already clear the empty_zero_page */
-- 
cgit v1.2.3-70-g09d2


From b43d196c4d3fe46d6dda7c987c47792612b80b1b Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Sun, 5 Oct 2008 12:21:32 -0700
Subject: x86: corruption-check: some post-move cleanups

Impact: cleanup

now that the code is moved and converted to a work queue,
there's some minor cleanups that can be done.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile |  3 ++-
 arch/x86/kernel/check.c  | 12 ++++--------
 2 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 31fbcaf3df70..f63a8034fb82 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -35,7 +35,6 @@ obj-y			+= bootflag.o e820.o
 obj-y			+= pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
 obj-y			+= alternative.o i8253.o pci-nommu.o
 obj-y			+= tsc.o io_delay.o rtc.o
-obj-y			+= check.o
 
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline.o
 obj-y				+= process.o
@@ -105,6 +104,8 @@ microcode-$(CONFIG_MICROCODE_INTEL)	+= microcode_intel.o
 microcode-$(CONFIG_MICROCODE_AMD)	+= microcode_amd.o
 obj-$(CONFIG_MICROCODE)			+= microcode.o
 
+obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
+
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 55eed1752b43..2ac0ab71412a 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -11,7 +11,6 @@
  * remaining free memory in that area and fill it with a distinct
  * pattern.
  */
-#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
 #define MAX_SCAN_AREAS	8
 
 static int __read_mostly memory_corruption_check = -1;
@@ -23,7 +22,7 @@ static struct e820entry scan_areas[MAX_SCAN_AREAS];
 static int num_scan_areas;
 
 
-static int set_corruption_check(char *arg)
+static __init int set_corruption_check(char *arg)
 {
 	char *end;
 
@@ -33,7 +32,7 @@ static int set_corruption_check(char *arg)
 }
 early_param("memory_corruption_check", set_corruption_check);
 
-static int set_corruption_check_period(char *arg)
+static __init int set_corruption_check_period(char *arg)
 {
 	char *end;
 
@@ -43,7 +42,7 @@ static int set_corruption_check_period(char *arg)
 }
 early_param("memory_corruption_check_period", set_corruption_check_period);
 
-static int set_corruption_check_size(char *arg)
+static __init int set_corruption_check_size(char *arg)
 {
 	char *end;
 	unsigned size;
@@ -115,8 +114,6 @@ void check_for_bios_corruption(void)
 	int i;
 	int corruption = 0;
 
-	printk("dot\n");
-
 	if (!memory_corruption_check)
 		return;
 
@@ -134,7 +131,7 @@ void check_for_bios_corruption(void)
 		}
 	}
 
-	WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n");
+	WARN_ONCE(corruption, KERN_ERR "Memory corruption detected in low memory\n");
 }
 
 static void check_corruption(struct work_struct *dummy);
@@ -161,5 +158,4 @@ static int start_periodic_check_for_corruption(void)
 }
 
 module_init(start_periodic_check_for_corruption);
-#endif
 
-- 
cgit v1.2.3-70-g09d2


From 6f290b4e016d6c61511542cf6d9ebdef1965978e Mon Sep 17 00:00:00 2001
From: Aristeu Rozanski <aris@redhat.com>
Date: Mon, 27 Oct 2008 12:42:34 -0400
Subject: x86, NMI watchdog: add support to enable and disable IOAPIC NMI

Impact: change/improve the way /proc/sys/kernel/nmi_watchdog works

This patch adds support to enable/disable IOAPIC NMI watchdog in runtime via
procfs.

Signed-off-by: Aristeu Rozanski <aris@redhat.com>
Cc: "Maciej W. Rozycki" <macro@linux-mips.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/nmi.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 2c97f07f1c2c..2c005fac6171 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -340,6 +340,8 @@ void stop_apic_nmi_watchdog(void *unused)
 		return;
 	if (nmi_watchdog == NMI_LOCAL_APIC)
 		lapic_watchdog_stop();
+	else
+		__acpi_nmi_disable(NULL);
 	__get_cpu_var(wd_enabled) = 0;
 	atomic_dec(&nmi_active);
 }
@@ -465,6 +467,24 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
 
 #ifdef CONFIG_SYSCTL
 
+static void enable_ioapic_nmi_watchdog_single(void *unused)
+{
+	__get_cpu_var(wd_enabled) = 1;
+	atomic_inc(&nmi_active);
+	__acpi_nmi_enable(NULL);
+}
+
+static void enable_ioapic_nmi_watchdog(void)
+{
+	on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
+	touch_nmi_watchdog();
+}
+
+static void disable_ioapic_nmi_watchdog(void)
+{
+	on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
+}
+
 static int __init setup_unknown_nmi_panic(char *str)
 {
 	unknown_nmi_panic = 1;
@@ -507,6 +527,11 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
 			enable_lapic_nmi_watchdog();
 		else
 			disable_lapic_nmi_watchdog();
+	} else if (nmi_watchdog == NMI_IO_APIC) {
+		if (nmi_watchdog_enabled)
+			enable_ioapic_nmi_watchdog();
+		else
+			disable_ioapic_nmi_watchdog();
 	} else {
 		printk(KERN_WARNING
 			"NMI watchdog doesn't know what hardware to touch\n");
-- 
cgit v1.2.3-70-g09d2


From 7d5a78cd98c3a5eb83bd6a061c5ea6ef1e9b8fcb Mon Sep 17 00:00:00 2001
From: Aristeu Rozanski <aris@redhat.com>
Date: Mon, 27 Oct 2008 12:42:35 -0400
Subject: x86, NMI watchdog: disable NMIs on LVT0 in case NMI watchdog is not
 working

Impact: change NMI watchdog detection and disabling sequence

Currently, if the NMI watchdog fails using IOAPIC method, it'll only disable
interrupts on 8259 if the timer is passing thru it. This patch disables
NMI delivery on LINT0 if the NMI watchdog initial test fails, just for safety.

Signed-off-by: Aristeu Rozanski <aris@redhat.com>
Cc: "Maciej W. Rozycki" <macro@linux-mips.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/nmi.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 2c005fac6171..13316cf57cdb 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -131,6 +131,11 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)
 	atomic_dec(&nmi_active);
 }
 
+static void __acpi_nmi_disable(void *__unused)
+{
+	apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
+}
+
 int __init check_nmi_watchdog(void)
 {
 	unsigned int *prev_nmi_count;
@@ -179,8 +184,12 @@ int __init check_nmi_watchdog(void)
 	kfree(prev_nmi_count);
 	return 0;
 error:
-	if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259)
-		disable_8259A_irq(0);
+	if (nmi_watchdog == NMI_IO_APIC) {
+		if (!timer_through_8259)
+			disable_8259A_irq(0);
+		on_each_cpu(__acpi_nmi_disable, NULL, 1);
+	}
+
 #ifdef CONFIG_X86_32
 	timer_ack = 0;
 #endif
@@ -285,11 +294,6 @@ void acpi_nmi_enable(void)
 		on_each_cpu(__acpi_nmi_enable, NULL, 1);
 }
 
-static void __acpi_nmi_disable(void *__unused)
-{
-	apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
-}
-
 /*
  * Disable timer based NMIs on all CPUs:
  */
-- 
cgit v1.2.3-70-g09d2


From 878719e831d9e076961aa15d4049a57a6668c67a Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Thu, 23 Oct 2008 10:40:06 -0400
Subject: x86: unify appropriate bits from dumpstack_32 and dumpstack_64

Impact: cleanup

As promised, now that dumpstack_32 and dumpstack_64 have so many bits
in common, we should merge the in-sync bits into a common file, to
prevent them from diverging again.

This patch removes bits which are common between dumpstack_32.c and
dumpstack_64.c and places them in a common dumpstack.c which is built
for both 32 and 64 bit arches.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

 Makefile       |    2
 arch/x86/kernel/Makefile       |    2
 arch/x86/kernel/Makefile       |    2
 arch/x86/kernel/Makefile       |    2
 arch/x86/kernel/Makefile       |    2
 arch/x86/kernel/Makefile       |    2
 arch/x86/kernel/dumpstack.c    |  319 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/dumpstack.h    |   39 +++++
 arch/x86/kernel/dumpstack_32.c |  294 -------------------------------------
 arch/x86/kernel/dumpstack_64.c |  285 ------------------------------------
 5 files changed, 363 insertions(+), 576 deletions(-)
---
 arch/x86/kernel/Makefile       |   2 +-
 arch/x86/kernel/dumpstack.c    | 319 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/dumpstack.h    |  39 +++++
 arch/x86/kernel/dumpstack_32.c | 294 +------------------------------------
 arch/x86/kernel/dumpstack_64.c | 285 +-----------------------------------
 5 files changed, 363 insertions(+), 576 deletions(-)
 create mode 100644 arch/x86/kernel/dumpstack.c
 create mode 100644 arch/x86/kernel/dumpstack.h

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d7e5a58ee22f..db3216a9d2b9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -24,7 +24,7 @@ CFLAGS_tsc.o		:= $(nostackp)
 
 obj-y			:= process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
 obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
-obj-y			+= time_$(BITS).o ioport.o ldt.o
+obj-y			+= time_$(BITS).o ioport.o ldt.o dumpstack.o
 obj-y			+= setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
 obj-$(CONFIG_X86_VISWS)	+= visws_quirks.o
 obj-$(CONFIG_X86_32)	+= probe_roms_32.o
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
new file mode 100644
index 000000000000..5962176dfabb
--- /dev/null
+++ b/arch/x86/kernel/dumpstack.c
@@ -0,0 +1,319 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+ */
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+#include <linux/utsname.h>
+#include <linux/hardirq.h>
+#include <linux/kdebug.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/kexec.h>
+#include <linux/bug.h>
+#include <linux/nmi.h>
+#include <linux/sysfs.h>
+
+#include <asm/stacktrace.h>
+
+#include "dumpstack.h"
+
+int panic_on_unrecovered_nmi;
+unsigned int code_bytes = 64;
+int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
+static int die_counter;
+
+void printk_address(unsigned long address, int reliable)
+{
+	printk(" [<%p>] %s%pS\n", (void *) address,
+			reliable ? "" : "? ", (void *) address);
+}
+
+/*
+ * x86-64 can have up to three kernel stacks:
+ * process stack
+ * interrupt stack
+ * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
+ */
+
+static inline int valid_stack_ptr(struct thread_info *tinfo,
+			void *p, unsigned int size, void *end)
+{
+	void *t = tinfo;
+	if (end) {
+		if (p < end && p >= (end-THREAD_SIZE))
+			return 1;
+		else
+			return 0;
+	}
+	return p > t && p < t + THREAD_SIZE - size;
+}
+
+unsigned long
+print_context_stack(struct thread_info *tinfo,
+		unsigned long *stack, unsigned long bp,
+		const struct stacktrace_ops *ops, void *data,
+		unsigned long *end)
+{
+	struct stack_frame *frame = (struct stack_frame *)bp;
+
+	while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
+		unsigned long addr;
+
+		addr = *stack;
+		if (__kernel_text_address(addr)) {
+			if ((unsigned long) stack == bp + sizeof(long)) {
+				ops->address(data, addr, 1);
+				frame = frame->next_frame;
+				bp = (unsigned long) frame;
+			} else {
+				ops->address(data, addr, bp == 0);
+			}
+		}
+		stack++;
+	}
+	return bp;
+}
+
+
+static void
+print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
+{
+	printk(data);
+	print_symbol(msg, symbol);
+	printk("\n");
+}
+
+static void print_trace_warning(void *data, char *msg)
+{
+	printk("%s%s\n", (char *)data, msg);
+}
+
+static int print_trace_stack(void *data, char *name)
+{
+	printk("%s <%s> ", (char *)data, name);
+	return 0;
+}
+
+/*
+ * Print one address/symbol entries per line.
+ */
+static void print_trace_address(void *data, unsigned long addr, int reliable)
+{
+	touch_nmi_watchdog();
+	printk(data);
+	printk_address(addr, reliable);
+}
+
+static const struct stacktrace_ops print_trace_ops = {
+	.warning = print_trace_warning,
+	.warning_symbol = print_trace_warning_symbol,
+	.stack = print_trace_stack,
+	.address = print_trace_address,
+};
+
+void
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+		unsigned long *stack, unsigned long bp, char *log_lvl)
+{
+	printk("%sCall Trace:\n", log_lvl);
+	dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
+}
+
+void show_trace(struct task_struct *task, struct pt_regs *regs,
+		unsigned long *stack, unsigned long bp)
+{
+	show_trace_log_lvl(task, regs, stack, bp, "");
+}
+
+void show_stack(struct task_struct *task, unsigned long *sp)
+{
+	show_stack_log_lvl(task, NULL, sp, 0, "");
+}
+
+/*
+ * The architecture-independent dump_stack generator
+ */
+void dump_stack(void)
+{
+	unsigned long bp = 0;
+	unsigned long stack;
+
+#ifdef CONFIG_FRAME_POINTER
+	if (!bp)
+		get_bp(bp);
+#endif
+
+	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
+		current->pid, current->comm, print_tainted(),
+		init_utsname()->release,
+		(int)strcspn(init_utsname()->version, " "),
+		init_utsname()->version);
+	show_trace(NULL, NULL, &stack, bp);
+}
+EXPORT_SYMBOL(dump_stack);
+
+static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static int die_owner = -1;
+static unsigned int die_nest_count;
+
+unsigned __kprobes long oops_begin(void)
+{
+	int cpu;
+	unsigned long flags;
+
+	oops_enter();
+
+	/* racy, but better than risking deadlock. */
+	raw_local_irq_save(flags);
+	cpu = smp_processor_id();
+	if (!__raw_spin_trylock(&die_lock)) {
+		if (cpu == die_owner)
+			/* nested oops. should stop eventually */;
+		else
+			__raw_spin_lock(&die_lock);
+	}
+	die_nest_count++;
+	die_owner = cpu;
+	console_verbose();
+	bust_spinlocks(1);
+	return flags;
+}
+
+void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
+{
+	if (regs && kexec_should_crash(current))
+		crash_kexec(regs);
+
+	bust_spinlocks(0);
+	die_owner = -1;
+	add_taint(TAINT_DIE);
+	die_nest_count--;
+	if (!die_nest_count)
+		/* Nest count reaches zero, release the lock. */
+		__raw_spin_unlock(&die_lock);
+	raw_local_irq_restore(flags);
+	oops_exit();
+
+	if (!signr)
+		return;
+	if (in_interrupt())
+		panic("Fatal exception in interrupt");
+	if (panic_on_oops)
+		panic("Fatal exception");
+	do_exit(signr);
+}
+
+int __kprobes __die(const char *str, struct pt_regs *regs, long err)
+{
+#ifdef CONFIG_X86_32
+	unsigned short ss;
+	unsigned long sp;
+#endif
+	printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
+#ifdef CONFIG_PREEMPT
+	printk("PREEMPT ");
+#endif
+#ifdef CONFIG_SMP
+	printk("SMP ");
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	printk("DEBUG_PAGEALLOC");
+#endif
+	printk("\n");
+	sysfs_printk_last_file();
+	if (notify_die(DIE_OOPS, str, regs, err,
+			current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
+		return 1;
+
+	show_registers(regs);
+#ifdef CONFIG_X86_32
+	sp = (unsigned long) (&regs->sp);
+	savesegment(ss, ss);
+	if (user_mode(regs)) {
+		sp = regs->sp;
+		ss = regs->ss & 0xffff;
+	}
+	printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
+	print_symbol("%s", regs->ip);
+	printk(" SS:ESP %04x:%08lx\n", ss, sp);
+#else
+	/* Executive summary in case the oops scrolled away */
+	printk(KERN_ALERT "RIP ");
+	printk_address(regs->ip, 1);
+	printk(" RSP <%016lx>\n", regs->sp);
+#endif
+	return 0;
+}
+
+/*
+ * This is gone through when something in the kernel has done something bad
+ * and is about to be terminated:
+ */
+void die(const char *str, struct pt_regs *regs, long err)
+{
+	unsigned long flags = oops_begin();
+	int sig = SIGSEGV;
+
+	if (!user_mode_vm(regs))
+		report_bug(regs->ip, regs);
+
+	if (__die(str, regs, err))
+		sig = 0;
+	oops_end(flags, regs, sig);
+}
+
+void notrace __kprobes
+die_nmi(char *str, struct pt_regs *regs, int do_panic)
+{
+	unsigned long flags;
+
+	if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
+		return;
+
+	/*
+	 * We are in trouble anyway, lets at least try
+	 * to get a message out.
+	 */
+	flags = oops_begin();
+	printk(KERN_EMERG "%s", str);
+	printk(" on CPU%d, ip %08lx, registers:\n",
+		smp_processor_id(), regs->ip);
+	show_registers(regs);
+	oops_end(flags, regs, 0);
+	if (do_panic || panic_on_oops)
+		panic("Non maskable interrupt");
+	nmi_exit();
+	local_irq_enable();
+	do_exit(SIGBUS);
+}
+
+static int __init oops_setup(char *s)
+{
+	if (!s)
+		return -EINVAL;
+	if (!strcmp(s, "panic"))
+		panic_on_oops = 1;
+	return 0;
+}
+early_param("oops", oops_setup);
+
+static int __init kstack_setup(char *s)
+{
+	if (!s)
+		return -EINVAL;
+	kstack_depth_to_print = simple_strtoul(s, NULL, 0);
+	return 0;
+}
+early_param("kstack", kstack_setup);
+
+static int __init code_bytes_setup(char *s)
+{
+	code_bytes = simple_strtoul(s, NULL, 0);
+	if (code_bytes > 8192)
+		code_bytes = 8192;
+
+	return 1;
+}
+__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
new file mode 100644
index 000000000000..3119a801c32b
--- /dev/null
+++ b/arch/x86/kernel/dumpstack.h
@@ -0,0 +1,39 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+ */
+
+#ifndef DUMPSTACK_H
+#define DUMPSTACK_H
+
+#ifdef CONFIG_X86_32
+#define STACKSLOTS_PER_LINE 8
+#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
+#else
+#define STACKSLOTS_PER_LINE 4
+#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
+#endif
+
+extern unsigned long
+print_context_stack(struct thread_info *tinfo,
+		unsigned long *stack, unsigned long bp,
+		const struct stacktrace_ops *ops, void *data,
+		unsigned long *end);
+
+extern void
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+		unsigned long *stack, unsigned long bp, char *log_lvl);
+
+extern void
+show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+		unsigned long *sp, unsigned long bp, char *log_lvl);
+
+extern unsigned int code_bytes;
+extern int kstack_depth_to_print;
+
+/* The form of the top of the frame on the stack */
+struct stack_frame {
+	struct stack_frame *next_frame;
+	unsigned long return_address;
+};
+#endif
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index f2046c5752d0..7b031b106ec8 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -17,64 +17,7 @@
 
 #include <asm/stacktrace.h>
 
-#define STACKSLOTS_PER_LINE 8
-#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
-
-int panic_on_unrecovered_nmi;
-int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
-static unsigned int code_bytes = 64;
-static int die_counter;
-
-void printk_address(unsigned long address, int reliable)
-{
-	printk(" [<%p>] %s%pS\n", (void *) address,
-			reliable ? "" : "? ", (void *) address);
-}
-
-static inline int valid_stack_ptr(struct thread_info *tinfo,
-			void *p, unsigned int size, void *end)
-{
-	void *t = tinfo;
-	if (end) {
-		if (p < end && p >= (end-THREAD_SIZE))
-			return 1;
-		else
-			return 0;
-	}
-	return p > t && p < t + THREAD_SIZE - size;
-}
-
-/* The form of the top of the frame on the stack */
-struct stack_frame {
-	struct stack_frame *next_frame;
-	unsigned long return_address;
-};
-
-static inline unsigned long
-print_context_stack(struct thread_info *tinfo,
-		unsigned long *stack, unsigned long bp,
-		const struct stacktrace_ops *ops, void *data,
-		unsigned long *end)
-{
-	struct stack_frame *frame = (struct stack_frame *)bp;
-
-	while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
-		unsigned long addr;
-
-		addr = *stack;
-		if (__kernel_text_address(addr)) {
-			if ((unsigned long) stack == bp + sizeof(long)) {
-				ops->address(data, addr, 1);
-				frame = frame->next_frame;
-				bp = (unsigned long) frame;
-			} else {
-				ops->address(data, addr, bp == 0);
-			}
-		}
-		stack++;
-	}
-	return bp;
-}
+#include "dumpstack.h"
 
 void dump_trace(struct task_struct *task, struct pt_regs *regs,
 		unsigned long *stack, unsigned long bp,
@@ -119,57 +62,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 }
 EXPORT_SYMBOL(dump_trace);
 
-static void
-print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
-	printk(data);
-	print_symbol(msg, symbol);
-	printk("\n");
-}
-
-static void print_trace_warning(void *data, char *msg)
-{
-	printk("%s%s\n", (char *)data, msg);
-}
-
-static int print_trace_stack(void *data, char *name)
-{
-	printk("%s <%s> ", (char *)data, name);
-	return 0;
-}
-
-/*
- * Print one address/symbol entries per line.
- */
-static void print_trace_address(void *data, unsigned long addr, int reliable)
-{
-	touch_nmi_watchdog();
-	printk(data);
-	printk_address(addr, reliable);
-}
-
-static const struct stacktrace_ops print_trace_ops = {
-	.warning = print_trace_warning,
-	.warning_symbol = print_trace_warning_symbol,
-	.stack = print_trace_stack,
-	.address = print_trace_address,
-};
-
-static void
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp, char *log_lvl)
-{
-	printk("%sCall Trace:\n", log_lvl);
-	dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
-}
-
-void show_trace(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp)
-{
-	show_trace_log_lvl(task, regs, stack, bp, "");
-}
-
-static void
+void
 show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		unsigned long *sp, unsigned long bp, char *log_lvl)
 {
@@ -196,33 +89,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 	show_trace_log_lvl(task, regs, sp, bp, log_lvl);
 }
 
-void show_stack(struct task_struct *task, unsigned long *sp)
-{
-	show_stack_log_lvl(task, NULL, sp, 0, "");
-}
-
-/*
- * The architecture-independent dump_stack generator
- */
-void dump_stack(void)
-{
-	unsigned long bp = 0;
-	unsigned long stack;
-
-#ifdef CONFIG_FRAME_POINTER
-	if (!bp)
-		get_bp(bp);
-#endif
-
-	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
-		current->pid, current->comm, print_tainted(),
-		init_utsname()->release,
-		(int)strcspn(init_utsname()->version, " "),
-		init_utsname()->version);
-	show_trace(NULL, NULL, &stack, bp);
-}
-
-EXPORT_SYMBOL(dump_stack);
 
 void show_registers(struct pt_regs *regs)
 {
@@ -283,159 +149,3 @@ int is_valid_bugaddr(unsigned long ip)
 	return ud2 == 0x0b0f;
 }
 
-static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
-static int die_owner = -1;
-static unsigned int die_nest_count;
-
-unsigned __kprobes long oops_begin(void)
-{
-	int cpu;
-	unsigned long flags;
-
-	oops_enter();
-
-	/* racy, but better than risking deadlock. */
-	raw_local_irq_save(flags);
-	cpu = smp_processor_id();
-	if (!__raw_spin_trylock(&die_lock)) {
-		if (cpu == die_owner)
-			/* nested oops. should stop eventually */;
-		else
-			__raw_spin_lock(&die_lock);
-	}
-	die_nest_count++;
-	die_owner = cpu;
-	console_verbose();
-	bust_spinlocks(1);
-	return flags;
-}
-
-void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
-{
-	if (regs && kexec_should_crash(current))
-		crash_kexec(regs);
-
-	bust_spinlocks(0);
-	die_owner = -1;
-	add_taint(TAINT_DIE);
-	die_nest_count--;
-	if (!die_nest_count)
-		/* Nest count reaches zero, release the lock. */
-		__raw_spin_unlock(&die_lock);
-	raw_local_irq_restore(flags);
-	oops_exit();
-
-	if (!signr)
-		return;
-	if (in_interrupt())
-		panic("Fatal exception in interrupt");
-	if (panic_on_oops)
-		panic("Fatal exception");
-	do_exit(signr);
-}
-
-int __kprobes __die(const char *str, struct pt_regs *regs, long err)
-{
-	unsigned short ss;
-	unsigned long sp;
-
-	printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
-#ifdef CONFIG_PREEMPT
-	printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
-	printk("SMP ");
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
-	printk("DEBUG_PAGEALLOC");
-#endif
-	printk("\n");
-	sysfs_printk_last_file();
-	if (notify_die(DIE_OOPS, str, regs, err,
-			current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
-		return 1;
-
-	show_registers(regs);
-	/* Executive summary in case the oops scrolled away */
-	sp = (unsigned long) (&regs->sp);
-	savesegment(ss, ss);
-	if (user_mode(regs)) {
-		sp = regs->sp;
-		ss = regs->ss & 0xffff;
-	}
-	printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
-	print_symbol("%s", regs->ip);
-	printk(" SS:ESP %04x:%08lx\n", ss, sp);
-	return 0;
-}
-
-/*
- * This is gone through when something in the kernel has done something bad
- * and is about to be terminated:
- */
-void die(const char *str, struct pt_regs *regs, long err)
-{
-	unsigned long flags = oops_begin();
-	int sig = SIGSEGV;
-
-	if (!user_mode_vm(regs))
-		report_bug(regs->ip, regs);
-
-	if (__die(str, regs, err))
-		sig = 0;
-	oops_end(flags, regs, sig);
-}
-
-void notrace __kprobes
-die_nmi(char *str, struct pt_regs *regs, int do_panic)
-{
-	unsigned long flags;
-
-	if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
-		return;
-
-	/*
-	 * We are in trouble anyway, lets at least try
-	 * to get a message out.
-	 */
-	flags = oops_begin();
-	printk(KERN_EMERG "%s", str);
-	printk(" on CPU%d, ip %08lx, registers:\n",
-		smp_processor_id(), regs->ip);
-	show_registers(regs);
-	oops_end(flags, regs, 0);
-	if (do_panic || panic_on_oops)
-		panic("Non maskable interrupt");
-	nmi_exit();
-	local_irq_enable();
-	do_exit(SIGBUS);
-}
-
-static int __init oops_setup(char *s)
-{
-	if (!s)
-		return -EINVAL;
-	if (!strcmp(s, "panic"))
-		panic_on_oops = 1;
-	return 0;
-}
-early_param("oops", oops_setup);
-
-static int __init kstack_setup(char *s)
-{
-	if (!s)
-		return -EINVAL;
-	kstack_depth_to_print = simple_strtoul(s, NULL, 0);
-	return 0;
-}
-early_param("kstack", kstack_setup);
-
-static int __init code_bytes_setup(char *s)
-{
-	code_bytes = simple_strtoul(s, NULL, 0);
-	if (code_bytes > 8192)
-		code_bytes = 8192;
-
-	return 1;
-}
-__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 28c67aae5562..33ff10287a5d 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -17,19 +17,7 @@
 
 #include <asm/stacktrace.h>
 
-#define STACKSLOTS_PER_LINE 4
-#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
-
-int panic_on_unrecovered_nmi;
-int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
-static unsigned int code_bytes = 64;
-static int die_counter;
-
-void printk_address(unsigned long address, int reliable)
-{
-	printk(" [<%p>] %s%pS\n", (void *) address,
-			reliable ? "" : "? ", (void *) address);
-}
+#include "dumpstack.h"
 
 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
 					unsigned *usedp, char **idp)
@@ -113,51 +101,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
  */
 
-static inline int valid_stack_ptr(struct thread_info *tinfo,
-			void *p, unsigned int size, void *end)
-{
-	void *t = tinfo;
-	if (end) {
-		if (p < end && p >= (end-THREAD_SIZE))
-			return 1;
-		else
-			return 0;
-	}
-	return p > t && p < t + THREAD_SIZE - size;
-}
-
-/* The form of the top of the frame on the stack */
-struct stack_frame {
-	struct stack_frame *next_frame;
-	unsigned long return_address;
-};
-
-static inline unsigned long
-print_context_stack(struct thread_info *tinfo,
-		unsigned long *stack, unsigned long bp,
-		const struct stacktrace_ops *ops, void *data,
-		unsigned long *end)
-{
-	struct stack_frame *frame = (struct stack_frame *)bp;
-
-	while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
-		unsigned long addr;
-
-		addr = *stack;
-		if (__kernel_text_address(addr)) {
-			if ((unsigned long) stack == bp + sizeof(long)) {
-				ops->address(data, addr, 1);
-				frame = frame->next_frame;
-				bp = (unsigned long) frame;
-			} else {
-				ops->address(data, addr, bp == 0);
-			}
-		}
-		stack++;
-	}
-	return bp;
-}
-
 void dump_trace(struct task_struct *task, struct pt_regs *regs,
 		unsigned long *stack, unsigned long bp,
 		const struct stacktrace_ops *ops, void *data)
@@ -248,57 +191,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 }
 EXPORT_SYMBOL(dump_trace);
 
-static void
-print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
-	printk(data);
-	print_symbol(msg, symbol);
-	printk("\n");
-}
-
-static void print_trace_warning(void *data, char *msg)
-{
-	printk("%s%s\n", (char *)data, msg);
-}
-
-static int print_trace_stack(void *data, char *name)
-{
-	printk("%s <%s> ", (char *)data, name);
-	return 0;
-}
-
-/*
- * Print one address/symbol entries per line.
- */
-static void print_trace_address(void *data, unsigned long addr, int reliable)
-{
-	touch_nmi_watchdog();
-	printk(data);
-	printk_address(addr, reliable);
-}
-
-static const struct stacktrace_ops print_trace_ops = {
-	.warning = print_trace_warning,
-	.warning_symbol = print_trace_warning_symbol,
-	.stack = print_trace_stack,
-	.address = print_trace_address,
-};
-
-static void
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp, char *log_lvl)
-{
-	printk("%sCall Trace:\n", log_lvl);
-	dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
-}
-
-void show_trace(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp)
-{
-	show_trace_log_lvl(task, regs, stack, bp, "");
-}
-
-static void
+void
 show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		unsigned long *sp, unsigned long bp, char *log_lvl)
 {
@@ -342,33 +235,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 	show_trace_log_lvl(task, regs, sp, bp, log_lvl);
 }
 
-void show_stack(struct task_struct *task, unsigned long *sp)
-{
-	show_stack_log_lvl(task, NULL, sp, 0, "");
-}
-
-/*
- * The architecture-independent dump_stack generator
- */
-void dump_stack(void)
-{
-	unsigned long bp = 0;
-	unsigned long stack;
-
-#ifdef CONFIG_FRAME_POINTER
-	if (!bp)
-		get_bp(bp);
-#endif
-
-	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
-		current->pid, current->comm, print_tainted(),
-		init_utsname()->release,
-		(int)strcspn(init_utsname()->version, " "),
-		init_utsname()->version);
-	show_trace(NULL, NULL, &stack, bp);
-}
-EXPORT_SYMBOL(dump_stack);
-
 void show_registers(struct pt_regs *regs)
 {
 	int i;
@@ -429,150 +295,3 @@ int is_valid_bugaddr(unsigned long ip)
 	return ud2 == 0x0b0f;
 }
 
-static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
-static int die_owner = -1;
-static unsigned int die_nest_count;
-
-unsigned __kprobes long oops_begin(void)
-{
-	int cpu;
-	unsigned long flags;
-
-	oops_enter();
-
-	/* racy, but better than risking deadlock. */
-	raw_local_irq_save(flags);
-	cpu = smp_processor_id();
-	if (!__raw_spin_trylock(&die_lock)) {
-		if (cpu == die_owner)
-			/* nested oops. should stop eventually */;
-		else
-			__raw_spin_lock(&die_lock);
-	}
-	die_nest_count++;
-	die_owner = cpu;
-	console_verbose();
-	bust_spinlocks(1);
-	return flags;
-}
-
-void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
-{
-	if (regs && kexec_should_crash(current))
-		crash_kexec(regs);
-
-	bust_spinlocks(0);
-	die_owner = -1;
-	add_taint(TAINT_DIE);
-	die_nest_count--;
-	if (!die_nest_count)
-		/* Nest count reaches zero, release the lock. */
-		__raw_spin_unlock(&die_lock);
-	raw_local_irq_restore(flags);
-	oops_exit();
-
-	if (!signr)
-		return;
-	if (in_interrupt())
-		panic("Fatal exception in interrupt");
-	if (panic_on_oops)
-		panic("Fatal exception");
-	do_exit(signr);
-}
-
-int __kprobes __die(const char *str, struct pt_regs *regs, long err)
-{
-	printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
-#ifdef CONFIG_PREEMPT
-	printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
-	printk("SMP ");
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
-	printk("DEBUG_PAGEALLOC");
-#endif
-	printk("\n");
-	sysfs_printk_last_file();
-	if (notify_die(DIE_OOPS, str, regs, err,
-			current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
-		return 1;
-
-	show_registers(regs);
-	/* Executive summary in case the oops scrolled away */
-	printk(KERN_ALERT "RIP ");
-	printk_address(regs->ip, 1);
-	printk(" RSP <%016lx>\n", regs->sp);
-	return 0;
-}
-
-/*
- * This is gone through when something in the kernel has done something bad
- * and is about to be terminated:
- */
-void die(const char *str, struct pt_regs *regs, long err)
-{
-	unsigned long flags = oops_begin();
-	int sig = SIGSEGV;
-
-	if (!user_mode_vm(regs))
-		report_bug(regs->ip, regs);
-
-	if (__die(str, regs, err))
-		sig = 0;
-	oops_end(flags, regs, sig);
-}
-
-void notrace __kprobes
-die_nmi(char *str, struct pt_regs *regs, int do_panic)
-{
-	unsigned long flags;
-
-	if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
-		return;
-
-	/*
-	 * We are in trouble anyway, lets at least try
-	 * to get a message out.
-	 */
-	flags = oops_begin();
-	printk(KERN_EMERG "%s", str);
-	printk(" on CPU%d, ip %08lx, registers:\n",
-		smp_processor_id(), regs->ip);
-	show_registers(regs);
-	oops_end(flags, regs, 0);
-	if (do_panic || panic_on_oops)
-		panic("Non maskable interrupt");
-	nmi_exit();
-	local_irq_enable();
-	do_exit(SIGBUS);
-}
-
-static int __init oops_setup(char *s)
-{
-	if (!s)
-		return -EINVAL;
-	if (!strcmp(s, "panic"))
-		panic_on_oops = 1;
-	return 0;
-}
-early_param("oops", oops_setup);
-
-static int __init kstack_setup(char *s)
-{
-	if (!s)
-		return -EINVAL;
-	kstack_depth_to_print = simple_strtoul(s, NULL, 0);
-	return 0;
-}
-early_param("kstack", kstack_setup);
-
-static int __init code_bytes_setup(char *s)
-{
-	code_bytes = simple_strtoul(s, NULL, 0);
-	if (code_bytes > 8192)
-		code_bytes = 8192;
-
-	return 1;
-}
-__setup("code_bytes=", code_bytes_setup);
-- 
cgit v1.2.3-70-g09d2


From 69a72a0e9337aad8c730e8e9942d5aa022bc4c5c Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Mon, 27 Oct 2008 07:51:20 -0700
Subject: x86/uv: update SCIR driver to use the idle_cpu() function

Impact: cleanup

Change UV heartbeat function to use idle_cpu to determine cpu's
"idleness".  Realign uv_hub definitions.

Signed-of-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uv/uv_hub.h | 26 +++++++++++++-------------
 arch/x86/kernel/genx2apic_uv_x.c |  4 ++--
 2 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 400776dba9b5..0ee12928e9ee 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -128,19 +128,19 @@ struct uv_scir_s {
  * They are kept together in a struct to minimize cache misses.
  */
 struct uv_hub_info_s {
-	unsigned long	global_mmr_base;
-	unsigned long	gpa_mask;
-	unsigned long	gnode_upper;
-	unsigned long	lowmem_remap_top;
-	unsigned long	lowmem_remap_base;
-	unsigned short	pnode;
-	unsigned short	pnode_mask;
-	unsigned short	coherency_domain_number;
-	unsigned short	numa_blade_id;
-	unsigned char	blade_processor_id;
-	unsigned char	m_val;
-	unsigned char	n_val;
-	struct uv_scir_s scir;
+	unsigned long		global_mmr_base;
+	unsigned long		gpa_mask;
+	unsigned long		gnode_upper;
+	unsigned long		lowmem_remap_top;
+	unsigned long		lowmem_remap_base;
+	unsigned short		pnode;
+	unsigned short		pnode_mask;
+	unsigned short		coherency_domain_number;
+	unsigned short		numa_blade_id;
+	unsigned char		blade_processor_id;
+	unsigned char		m_val;
+	unsigned char		n_val;
+	struct uv_scir_s	scir;
 };
 
 DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 84367d84bb10..85fb7dd48f67 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -370,8 +370,8 @@ static void uv_heartbeat(unsigned long ignored)
 	/* flip heartbeat bit */
 	bits ^= SCIR_CPU_HEARTBEAT;
 
-	/* are we the idle thread? */
-	if (current->pid == 0)
+	/* is this cpu idle? */
+	if (idle_cpu(raw_smp_processor_id()))
 		bits &= ~SCIR_CPU_ACTIVITY;
 	else
 		bits |= SCIR_CPU_ACTIVITY;
-- 
cgit v1.2.3-70-g09d2


From 30604bb410b53efa9c93ee8f03d7aa7494094faa Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 14 Oct 2008 18:59:18 -0700
Subject: x86: break up mtrr_cleanup() into several small functions.

Ingo said mtrr_cleanup() is big and ugly.

so break it up into more functions and make it more readable.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mtrr/main.c | 346 ++++++++++++++++++++--------------------
 1 file changed, 171 insertions(+), 175 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index c78c04821ea1..1159e269e596 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -803,6 +803,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 }
 
 static struct res_range __initdata range[RANGE_NUM];
+static int __initdata nr_range;
 
 #ifdef CONFIG_MTRR_SANITIZER
 
@@ -1206,39 +1207,43 @@ struct mtrr_cleanup_result {
 #define PSHIFT		(PAGE_SHIFT - 10)
 
 static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
-static struct res_range __initdata range_new[RANGE_NUM];
 static unsigned long __initdata min_loss_pfn[RANGE_NUM];
 
-static int __init mtrr_cleanup(unsigned address_bits)
+static void __init print_out_mtrr_range_state(void)
 {
-	unsigned long extra_remove_base, extra_remove_size;
-	unsigned long base, size, def, dummy;
-	mtrr_type type;
-	int nr_range, nr_range_new;
-	u64 chunk_size, gran_size;
-	unsigned long range_sums, range_sums_new;
-	int index_good;
-	int num_reg_good;
 	int i;
+	char start_factor = 'K', size_factor = 'K';
+	unsigned long start_base, size_base;
+	mtrr_type type;
 
-	/* extra one for all 0 */
-	int num[MTRR_NUM_TYPES + 1];
+	for (i = 0; i < num_var_ranges; i++) {
 
-	if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
-		return 0;
-	rdmsr(MTRRdefType_MSR, def, dummy);
-	def &= 0xff;
-	if (def != MTRR_TYPE_UNCACHABLE)
-		return 0;
+		size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
+		if (!size_base)
+			continue;
 
-	/* get it and store it aside */
-	memset(range_state, 0, sizeof(range_state));
-	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
-		range_state[i].base_pfn = base;
-		range_state[i].size_pfn = size;
-		range_state[i].type = type;
+		size_base = to_size_factor(size_base, &size_factor),
+		start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
+		start_base = to_size_factor(start_base, &start_factor),
+		type = range_state[i].type;
+
+		printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
+			i, start_base, start_factor,
+			size_base, size_factor,
+			(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
+			    ((type == MTRR_TYPE_WRPROT) ? "WP" :
+			     ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
+			);
 	}
+}
+
+static int __init mtrr_need_cleanup(void)
+{
+	int i;
+	mtrr_type type;
+	unsigned long size;
+	/* extra one for all 0 */
+	int num[MTRR_NUM_TYPES + 1];
 
 	/* check entries number */
 	memset(num, 0, sizeof(num));
@@ -1263,29 +1268,133 @@ static int __init mtrr_cleanup(unsigned address_bits)
 		num_var_ranges - num[MTRR_NUM_TYPES])
 		return 0;
 
-	/* print original var MTRRs at first, for debugging: */
-	printk(KERN_DEBUG "original variable MTRRs\n");
-	for (i = 0; i < num_var_ranges; i++) {
-		char start_factor = 'K', size_factor = 'K';
-		unsigned long start_base, size_base;
+	return 1;
+}
 
-		size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
-		if (!size_base)
-			continue;
+static unsigned long __initdata range_sums;
+static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
+					 unsigned long extra_remove_base,
+					 unsigned long extra_remove_size,
+					 int i)
+{
+	int num_reg;
+	static struct res_range range_new[RANGE_NUM];
+	static int nr_range_new;
+	unsigned long range_sums_new;
+
+	/* convert ranges to var ranges state */
+	num_reg = x86_setup_var_mtrrs(range, nr_range,
+						chunk_size, gran_size);
+
+	/* we got new setting in range_state, check it */
+	memset(range_new, 0, sizeof(range_new));
+	nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+				extra_remove_base, extra_remove_size);
+	range_sums_new = sum_ranges(range_new, nr_range_new);
+
+	result[i].chunk_sizek = chunk_size >> 10;
+	result[i].gran_sizek = gran_size >> 10;
+	result[i].num_reg = num_reg;
+	if (range_sums < range_sums_new) {
+		result[i].lose_cover_sizek =
+			(range_sums_new - range_sums) << PSHIFT;
+		result[i].bad = 1;
+	} else
+		result[i].lose_cover_sizek =
+			(range_sums - range_sums_new) << PSHIFT;
 
-		size_base = to_size_factor(size_base, &size_factor),
-		start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
-		start_base = to_size_factor(start_base, &start_factor),
-		type = range_state[i].type;
+	/* double check it */
+	if (!result[i].bad && !result[i].lose_cover_sizek) {
+		if (nr_range_new != nr_range ||
+			memcmp(range, range_new, sizeof(range)))
+				result[i].bad = 1;
+	}
 
-		printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
-			i, start_base, start_factor,
-			size_base, size_factor,
-			(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
-			    ((type == MTRR_TYPE_WRPROT) ? "WP" :
-			     ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
-			);
+	if (!result[i].bad && (range_sums - range_sums_new <
+				min_loss_pfn[num_reg])) {
+		min_loss_pfn[num_reg] =
+			range_sums - range_sums_new;
 	}
+}
+
+static void __init mtrr_print_out_one_result(int i)
+{
+	char gran_factor, chunk_factor, lose_factor;
+	unsigned long gran_base, chunk_base, lose_base;
+
+	gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+	chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+	lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+	printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
+			result[i].bad ? "*BAD*" : " ",
+			gran_base, gran_factor, chunk_base, chunk_factor);
+	printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ld%c\n",
+			result[i].num_reg, result[i].bad ? "-" : "",
+			lose_base, lose_factor);
+}
+
+static int __init mtrr_search_optimal_index(void)
+{
+	int i;
+	int num_reg_good;
+	int index_good;
+
+	if (nr_mtrr_spare_reg >= num_var_ranges)
+		nr_mtrr_spare_reg = num_var_ranges - 1;
+	num_reg_good = -1;
+	for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
+		if (!min_loss_pfn[i])
+			num_reg_good = i;
+	}
+
+	index_good = -1;
+	if (num_reg_good != -1) {
+		for (i = 0; i < NUM_RESULT; i++) {
+			if (!result[i].bad &&
+			    result[i].num_reg == num_reg_good &&
+			    !result[i].lose_cover_sizek) {
+				index_good = i;
+				break;
+			}
+		}
+	}
+
+	return index_good;
+}
+
+
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+	unsigned long extra_remove_base, extra_remove_size;
+	unsigned long base, size, def, dummy;
+	mtrr_type type;
+	u64 chunk_size, gran_size;
+	int index_good;
+	int i;
+
+	if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
+		return 0;
+	rdmsr(MTRRdefType_MSR, def, dummy);
+	def &= 0xff;
+	if (def != MTRR_TYPE_UNCACHABLE)
+		return 0;
+
+	/* get it and store it aside */
+	memset(range_state, 0, sizeof(range_state));
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		range_state[i].base_pfn = base;
+		range_state[i].size_pfn = size;
+		range_state[i].type = type;
+	}
+
+	/* check if we need handle it and can handle it */
+	if (!mtrr_need_cleanup())
+		return 0;
+
+	/* print original var MTRRs at first, for debugging: */
+	printk(KERN_DEBUG "original variable MTRRs\n");
+	print_out_mtrr_range_state();
 
 	memset(range, 0, sizeof(range));
 	extra_remove_size = 0;
@@ -1309,176 +1418,64 @@ static int __init mtrr_cleanup(unsigned address_bits)
 	       range_sums >> (20 - PAGE_SHIFT));
 
 	if (mtrr_chunk_size && mtrr_gran_size) {
-		int num_reg;
-		char gran_factor, chunk_factor, lose_factor;
-		unsigned long gran_base, chunk_base, lose_base;
-
-		debug_print++;
-		/* convert ranges to var ranges state */
-		num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
-					      mtrr_gran_size);
+		i = 0;
+		mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
+				      extra_remove_base, extra_remove_size, i);
 
-		/* we got new setting in range_state, check it */
-		memset(range_new, 0, sizeof(range_new));
-		nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
-						      extra_remove_base,
-						      extra_remove_size);
-		range_sums_new = sum_ranges(range_new, nr_range_new);
+		mtrr_print_out_one_result(i);
 
-		i = 0;
-		result[i].chunk_sizek = mtrr_chunk_size >> 10;
-		result[i].gran_sizek = mtrr_gran_size >> 10;
-		result[i].num_reg = num_reg;
-		if (range_sums < range_sums_new) {
-			result[i].lose_cover_sizek =
-				(range_sums_new - range_sums) << PSHIFT;
-			result[i].bad = 1;
-		} else
-			result[i].lose_cover_sizek =
-				(range_sums - range_sums_new) << PSHIFT;
-
-		gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
-		chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
-		lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
-		printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
-			 result[i].bad?"*BAD*":" ",
-			 gran_base, gran_factor, chunk_base, chunk_factor);
-		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ld%c\n",
-			 result[i].num_reg, result[i].bad?"-":"",
-			 lose_base, lose_factor);
 		if (!result[i].bad) {
 			set_var_mtrr_all(address_bits);
 			return 1;
 		}
 		printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
 		       "will find optimal one\n");
-		debug_print--;
-		memset(result, 0, sizeof(result[0]));
 	}
 
 	i = 0;
 	memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
 	memset(result, 0, sizeof(result));
 	for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
-		char gran_factor;
-		unsigned long gran_base;
-
-		if (debug_print)
-			gran_base = to_size_factor(gran_size >> 10, &gran_factor);
 
 		for (chunk_size = gran_size; chunk_size < (1ULL<<32);
 		     chunk_size <<= 1) {
-			int num_reg;
 
-			if (debug_print) {
-				char chunk_factor;
-				unsigned long chunk_base;
-
-				chunk_base = to_size_factor(chunk_size>>10, &chunk_factor),
-				printk(KERN_INFO "\n");
-				printk(KERN_INFO "gran_size: %ld%c   chunk_size: %ld%c \n",
-				       gran_base, gran_factor, chunk_base, chunk_factor);
-			}
 			if (i >= NUM_RESULT)
 				continue;
 
-			/* convert ranges to var ranges state */
-			num_reg = x86_setup_var_mtrrs(range, nr_range,
-							 chunk_size, gran_size);
-
-			/* we got new setting in range_state, check it */
-			memset(range_new, 0, sizeof(range_new));
-			nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
-					 extra_remove_base, extra_remove_size);
-			range_sums_new = sum_ranges(range_new, nr_range_new);
-
-			result[i].chunk_sizek = chunk_size >> 10;
-			result[i].gran_sizek = gran_size >> 10;
-			result[i].num_reg = num_reg;
-			if (range_sums < range_sums_new) {
-				result[i].lose_cover_sizek =
-					(range_sums_new - range_sums) << PSHIFT;
-				result[i].bad = 1;
-			} else
-				result[i].lose_cover_sizek =
-					(range_sums - range_sums_new) << PSHIFT;
-
-			/* double check it */
-			if (!result[i].bad && !result[i].lose_cover_sizek) {
-				if (nr_range_new != nr_range ||
-					memcmp(range, range_new, sizeof(range)))
-						result[i].bad = 1;
+			mtrr_calc_range_state(chunk_size, gran_size,
+				      extra_remove_base, extra_remove_size, i);
+			if (debug_print) {
+				mtrr_print_out_one_result(i);
+				printk(KERN_INFO "\n");
 			}
 
-			if (!result[i].bad && (range_sums - range_sums_new <
-					       min_loss_pfn[num_reg])) {
-				min_loss_pfn[num_reg] =
-					range_sums - range_sums_new;
-			}
 			i++;
 		}
 	}
 
-	/* print out all */
-	for (i = 0; i < NUM_RESULT; i++) {
-		char gran_factor, chunk_factor, lose_factor;
-		unsigned long gran_base, chunk_base, lose_base;
-
-		gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
-		chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
-		lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
-		printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
-			 result[i].bad?"*BAD*":" ",
-			 gran_base, gran_factor, chunk_base, chunk_factor);
-		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ld%c\n",
-			 result[i].num_reg, result[i].bad?"-":"",
-			 lose_base, lose_factor);
-	}
-
 	/* try to find the optimal index */
-	if (nr_mtrr_spare_reg >= num_var_ranges)
-		nr_mtrr_spare_reg = num_var_ranges - 1;
-	num_reg_good = -1;
-	for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
-		if (!min_loss_pfn[i])
-			num_reg_good = i;
-	}
-
-	index_good = -1;
-	if (num_reg_good != -1) {
-		for (i = 0; i < NUM_RESULT; i++) {
-			if (!result[i].bad &&
-			    result[i].num_reg == num_reg_good &&
-			    !result[i].lose_cover_sizek) {
-				index_good = i;
-				break;
-			}
-		}
-	}
+	index_good = mtrr_search_optimal_index();
 
 	if (index_good != -1) {
-		char gran_factor, chunk_factor, lose_factor;
-		unsigned long gran_base, chunk_base, lose_base;
-
 		printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
 		i = index_good;
-		gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
-		chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
-		lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
-		printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t",
-			 gran_base, gran_factor, chunk_base, chunk_factor);
-		printk(KERN_CONT "num_reg: %d  \tlose RAM: %ld%c\n",
-			 result[i].num_reg, lose_base, lose_factor);
+		mtrr_print_out_one_result(i);
+
 		/* convert ranges to var ranges state */
 		chunk_size = result[i].chunk_sizek;
 		chunk_size <<= 10;
 		gran_size = result[i].gran_sizek;
 		gran_size <<= 10;
-		debug_print++;
 		x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
-		debug_print--;
 		set_var_mtrr_all(address_bits);
+		printk(KERN_DEBUG "New variable MTRRs\n");
+		print_out_mtrr_range_state();
 		return 1;
+	} else {
+		/* print out all */
+		for (i = 0; i < NUM_RESULT; i++)
+			mtrr_print_out_one_result(i);
 	}
 
 	printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
@@ -1562,7 +1559,6 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 {
 	unsigned long i, base, size, highest_pfn = 0, def, dummy;
 	mtrr_type type;
-	int nr_range;
 	u64 total_trim_size;
 
 	/* extra one for all 0 */
-- 
cgit v1.2.3-70-g09d2


From d4f1b10365d4f03dd802433e0014cf503e6e930c Mon Sep 17 00:00:00 2001
From: Jike Song <albcamus@gmail.com>
Date: Fri, 17 Oct 2008 13:25:07 +0800
Subject: x86: clean up comments wrt. rd{msr|tsc|pmc}

The rdmsr instruction(et al) for i386 and x86-64 are semantically same.
The only difference is how gcc interpret constraint "A" for these targets.

Signed-off-by: Jike Song <albcamus@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/msr.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 46be2fa7ac26..478a9245aae1 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -22,10 +22,10 @@ static inline unsigned long long native_read_tscp(unsigned int *aux)
 }
 
 /*
- * i386 calling convention returns 64-bit value in edx:eax, while
- * x86_64 returns at rax. Also, the "A" constraint does not really
- * mean rdx:rax in x86_64, so we need specialized behaviour for each
- * architecture
+ * both i386 and x86_64 returns 64-bit value in edx:eax, but gcc's "A"
+ * constraint has different meanings. For i386, "A" means exactly
+ * edx:eax, while for x86_64 it doesn't mean rdx:rax or edx:eax. Instead,
+ * it means rax *or* rdx.
  */
 #ifdef CONFIG_X86_64
 #define DECLARE_ARGS(val, low, high)	unsigned low, high
-- 
cgit v1.2.3-70-g09d2


From ad38dab01323a01e825555fc46863b73cd0efdc7 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Mon, 27 Oct 2008 13:30:56 -0700
Subject: x86: use the new byteorder headers

Impact: cleanup, no functionality changed

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/byteorder.h | 74 ++++++++++++++++------------------------
 1 file changed, 29 insertions(+), 45 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/asm/byteorder.h
index e02ae2d89acf..f110ad417df3 100644
--- a/arch/x86/include/asm/byteorder.h
+++ b/arch/x86/include/asm/byteorder.h
@@ -4,26 +4,33 @@
 #include <asm/types.h>
 #include <linux/compiler.h>
 
-#ifdef __GNUC__
+#define __LITTLE_ENDIAN
 
-#ifdef __i386__
-
-static inline __attribute_const__ __u32 ___arch__swab32(__u32 x)
+static inline __attribute_const__ __u32 __arch_swab32(__u32 val)
 {
-#ifdef CONFIG_X86_BSWAP
-	asm("bswap %0" : "=r" (x) : "0" (x));
-#else
+#ifdef __i386__
+# ifdef CONFIG_X86_BSWAP
+	asm("bswap %0" : "=r" (val) : "0" (val));
+# else
 	asm("xchgb %b0,%h0\n\t"	/* swap lower bytes	*/
 	    "rorl $16,%0\n\t"	/* swap words		*/
 	    "xchgb %b0,%h0"	/* swap higher bytes	*/
-	    : "=q" (x)
-	    : "0" (x));
+	    : "=q" (val)
+	    : "0" (val));
+# endif
+
+#else /* __i386__ */
+	asm("bswapl %0"
+	    : "=r" (val)
+	    : "0" (val));
 #endif
-	return x;
+	return val;
 }
+#define __arch_swab32 __arch_swab32
 
-static inline __attribute_const__ __u64 ___arch__swab64(__u64 val)
+static inline __attribute_const__ __u64 __arch_swab64(__u64 val)
 {
+#ifdef __i386__
 	union {
 		struct {
 			__u32 a;
@@ -32,50 +39,27 @@ static inline __attribute_const__ __u64 ___arch__swab64(__u64 val)
 		__u64 u;
 	} v;
 	v.u = val;
-#ifdef CONFIG_X86_BSWAP
+# ifdef CONFIG_X86_BSWAP
 	asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1"
 	    : "=r" (v.s.a), "=r" (v.s.b)
 	    : "0" (v.s.a), "1" (v.s.b));
-#else
-	v.s.a = ___arch__swab32(v.s.a);
-	v.s.b = ___arch__swab32(v.s.b);
+# else
+	v.s.a = __arch_swab32(v.s.a);
+	v.s.b = __arch_swab32(v.s.b);
 	asm("xchgl %0,%1"
 	    : "=r" (v.s.a), "=r" (v.s.b)
 	    : "0" (v.s.a), "1" (v.s.b));
-#endif
+# endif
 	return v.u;
-}
-
 #else /* __i386__ */
-
-static inline __attribute_const__ __u64 ___arch__swab64(__u64 x)
-{
 	asm("bswapq %0"
-	    : "=r" (x)
-	    : "0" (x));
-	return x;
-}
-
-static inline __attribute_const__ __u32 ___arch__swab32(__u32 x)
-{
-	asm("bswapl %0"
-	    : "=r" (x)
-	    : "0" (x));
-	return x;
-}
-
+	    : "=r" (val)
+	    : "0" (val));
+	return val;
 #endif
+}
+#define __arch_swab64 __arch_swab64
 
-/* Do not define swab16.  Gcc is smart enough to recognize "C" version and
-   convert it into rotation or exhange.  */
-
-#define __arch__swab64(x) ___arch__swab64(x)
-#define __arch__swab32(x) ___arch__swab32(x)
-
-#define __BYTEORDER_HAS_U64__
-
-#endif /* __GNUC__ */
-
-#include <linux/byteorder/little_endian.h>
+#include <linux/byteorder.h>
 
 #endif /* _ASM_X86_BYTEORDER_H */
-- 
cgit v1.2.3-70-g09d2


From 96bf84b71255b0ee4fcee91e9acd1b5e73030eaf Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 29 Oct 2008 18:44:08 -0700
Subject: x86: signal: cosmetic unification of signr_convert()

Impact: cleanup

Make signr_convert() same.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 2 ++
 arch/x86/kernel/signal_64.c | 6 ++++++
 2 files changed, 8 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 85a0d37cdae9..abf0df700fd0 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -503,10 +503,12 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
  */
 static int signr_convert(int sig)
 {
+#ifdef CONFIG_X86_32
 	struct thread_info *info = current_thread_info();
 
 	if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32)
 		return info->exec_domain->signal_invmap[sig];
+#endif /* CONFIG_X86_32 */
 	return sig;
 }
 
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 3d0deb336745..a4b46e6392b1 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -295,6 +295,12 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
  */
 static int signr_convert(int sig)
 {
+#ifdef CONFIG_X86_32
+	struct thread_info *info = current_thread_info();
+
+	if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32)
+		return info->exec_domain->signal_invmap[sig];
+#endif /* CONFIG_X86_32 */
 	return sig;
 }
 
-- 
cgit v1.2.3-70-g09d2


From cabf503588961d202a33b3fd872767e9f6abbef7 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 29 Oct 2008 18:46:07 -0700
Subject: x86: signal: cosmetic unification of macros for setup_rt_frame()

Impact: cleanup

Add #ifdef directive for unification.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 12 ++++++++++++
 arch/x86/kernel/signal_64.c | 14 ++++++++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index abf0df700fd0..6f3b9a9cc123 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -512,10 +512,22 @@ static int signr_convert(int sig)
 	return sig;
 }
 
+#ifdef CONFIG_X86_32
+
 #define is_ia32	1
 #define ia32_setup_frame	__setup_frame
 #define ia32_setup_rt_frame	__setup_rt_frame
 
+#else /* !CONFIG_X86_32 */
+
+#ifdef CONFIG_IA32_EMULATION
+#define is_ia32	test_thread_flag(TIF_IA32)
+#else /* !CONFIG_IA32_EMULATION */
+#define is_ia32	0
+#endif /* CONFIG_IA32_EMULATION */
+
+#endif /* CONFIG_X86_32 */
+
 static int
 setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	       sigset_t *set, struct pt_regs *regs)
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index a4b46e6392b1..49df79e05111 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -304,11 +304,21 @@ static int signr_convert(int sig)
 	return sig;
 }
 
+#ifdef CONFIG_X86_32
+
+#define is_ia32	1
+#define ia32_setup_frame	__setup_frame
+#define ia32_setup_rt_frame	__setup_rt_frame
+
+#else /* !CONFIG_X86_32 */
+
 #ifdef CONFIG_IA32_EMULATION
 #define is_ia32	test_thread_flag(TIF_IA32)
-#else
+#else /* !CONFIG_IA32_EMULATION */
 #define is_ia32	0
-#endif
+#endif /* CONFIG_IA32_EMULATION */
+
+#endif /* CONFIG_X86_32 */
 
 static int
 setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-- 
cgit v1.2.3-70-g09d2


From 57917752f51bcead3bb6c83d74137fbe342504ec Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 29 Oct 2008 18:46:40 -0700
Subject: x86: signal: cosmetic unification of NR_restart_syscall

Impact: cleanup

Add #ifdef directive to unify NR_restart_syscall.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 6 ++++++
 arch/x86/kernel/signal_64.c | 5 +++++
 2 files changed, 11 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 6f3b9a9cc123..a0efc1b3c4c9 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -628,7 +628,13 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	return 0;
 }
 
+#ifdef CONFIG_X86_32
 #define NR_restart_syscall	__NR_restart_syscall
+#else /* !CONFIG_X86_32 */
+#define NR_restart_syscall	\
+	test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
+#endif /* CONFIG_X86_32 */
+
 /*
  * Note that 'init' is a special process: it doesn't get signals it doesn't
  * want to handle. Thus you cannot kill init even with a SIGKILL even by
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 49df79e05111..83990db82f74 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -420,8 +420,13 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	return 0;
 }
 
+#ifdef CONFIG_X86_32
+#define NR_restart_syscall	__NR_restart_syscall
+#else /* !CONFIG_X86_32 */
 #define NR_restart_syscall	\
 	test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
+#endif /* CONFIG_X86_32 */
+
 /*
  * Note that 'init' is a special process: it doesn't get signals it doesn't
  * want to handle. Thus you cannot kill init even with a SIGKILL even by
-- 
cgit v1.2.3-70-g09d2


From 7a5276889cfa96619bf863c87581005f46139986 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Thu, 30 Oct 2008 10:38:24 +0000
Subject: x86: simplify X86_MPPARSE config option

Impact: cleanup

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 350bee1d54dc..f843de13e242 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -235,21 +235,13 @@ config X86_FIND_SMP_CONFIG
 	def_bool y
 	depends on X86_MPPARSE || X86_VOYAGER
 
-if ACPI
 config X86_MPPARSE
-	def_bool y
-	bool "Enable MPS table"
+	bool "Enable MPS table" if ACPI
+	default y
 	depends on X86_LOCAL_APIC
 	help
 	  For old smp systems that do not have proper acpi support. Newer systems
 	  (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
-endif
-
-if !ACPI
-config X86_MPPARSE
-	def_bool y
-	depends on X86_LOCAL_APIC
-endif
 
 choice
 	prompt "Subarchitecture Type"
-- 
cgit v1.2.3-70-g09d2


From b062f841b569791d3054e975cd85f48562161565 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Thu, 30 Oct 2008 19:16:46 +0300
Subject: x86: nmi - add sensible names to nmi_watchdog boot param

Impact: introduce nmi_watchdog=lapic and nmi_watchdog=ioapic aliases

Add sensible names as "lapic" and "ioapic" to
nmi_watchdog boot parameter. Sometimes it is not
that easy to recall what exactly nmi_watchdog=1
does mean so we allow the using of symbolic names here.

Old numeric values remain valid.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/nmi.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 2c97f07f1c2c..c4869e4532a3 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -199,12 +199,17 @@ static int __init setup_nmi_watchdog(char *str)
 		++str;
 	}
 
-	get_option(&str, &nmi);
-
-	if (nmi >= NMI_INVALID)
-		return 0;
+	if (!strncmp(str, "lapic", 5))
+		nmi_watchdog = NMI_LOCAL_APIC;
+	else if (!strncmp(str, "ioapic", 6))
+		nmi_watchdog = NMI_IO_APIC;
+	else {
+		get_option(&str, &nmi);
+		if (nmi >= NMI_INVALID)
+			return 0;
+		nmi_watchdog = nmi;
+	}
 
-	nmi_watchdog = nmi;
 	return 1;
 }
 __setup("nmi_watchdog=", setup_nmi_watchdog);
-- 
cgit v1.2.3-70-g09d2


From 1cbd8b3fdcf56a3c39a7596512095c9e33221fa1 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Thu, 30 Oct 2008 10:45:36 +0000
Subject: x86: add two missing unwind annotations

Impact: improve debuginfo

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b86f332c96a6..ddeeb1052583 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -255,6 +255,7 @@ ENTRY(ret_from_fork)
 	call schedule_tail
 	GET_THREAD_INFO(%rcx)
 	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
+	CFI_REMEMBER_STATE
 	jnz rff_trace
 rff_action:	
 	RESTORE_REST
@@ -264,6 +265,7 @@ rff_action:
 	jnz  int_ret_from_sys_call
 	RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
 	jmp ret_from_sys_call
+	CFI_RESTORE_STATE
 rff_trace:
 	movq %rsp,%rdi
 	call syscall_trace_leave
-- 
cgit v1.2.3-70-g09d2


From 31498a01496ffca3b542bae72b8ec499cd9302db Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Fri, 31 Oct 2008 09:48:02 +0800
Subject: kexec/i386: remove PAGE_SIZE alignment from relocate_kernel

Impact: save kernel .text by loosening kexec page alignment

This patch removes PAGE_SIZE alignment from relocate_kernel(). Before
kexec jump patches are merged, control page is mapped to
relocate_kernel in kexec page tables, so relocate_kernel must be
PAGE_SIZE aligned. Now, control page is mapped to identity mapped
address, so relocate_kernel need not to be PAGE_SIZE aligned any
more. This can reduce a few KB from kernel text segement.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/relocate_kernel_32.S | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 6f50664b2ba5..377da3f78e8c 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -39,7 +39,6 @@
 #define CP_PA_BACKUP_PAGES_MAP	DATA(0x1c)
 
 	.text
-	.align PAGE_SIZE
 	.globl relocate_kernel
 relocate_kernel:
 	/* Save the CPU context, used for jumping back */
-- 
cgit v1.2.3-70-g09d2


From 92be3d6bdf2cb34972ab50e12ad4da1076e690da Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Fri, 31 Oct 2008 09:48:08 +0800
Subject: kexec/i386: allocate page table pages dynamically

Impact: save .text size when kexec is built in but not loaded

This patch adds an architecture specific struct kimage_arch into
struct kimage. The pointers to page table pages used by kexec are
added to struct kimage_arch. The page tables pages are dynamically
allocated in machine_kexec_prepare instead of statically from BSS
segment. This will save up to 20k memory when kexec image is not
loaded.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/kexec.h       | 14 ++++++++
 arch/x86/kernel/machine_kexec_32.c | 67 ++++++++++++++++++++++++++------------
 include/linux/kexec.h              |  4 +++
 3 files changed, 64 insertions(+), 21 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index a1f22771a15a..df9c41a9c6ae 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -170,6 +170,20 @@ relocate_kernel(unsigned long indirection_page,
 		unsigned long start_address) ATTRIB_NORET;
 #endif
 
+#ifdef CONFIG_X86_32
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+	pgd_t *pgd;
+#ifdef CONFIG_X86_PAE
+	pmd_t *pmd0;
+	pmd_t *pmd1;
+#endif
+	pte_t *pte0;
+	pte_t *pte1;
+};
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_X86_KEXEC_H */
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 7a385746509a..1100312847a5 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -13,6 +13,7 @@
 #include <linux/numa.h>
 #include <linux/ftrace.h>
 #include <linux/suspend.h>
+#include <linux/gfp.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -25,15 +26,6 @@
 #include <asm/system.h>
 #include <asm/cacheflush.h>
 
-#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
-static u32 kexec_pgd[1024] PAGE_ALIGNED;
-#ifdef CONFIG_X86_PAE
-static u32 kexec_pmd0[1024] PAGE_ALIGNED;
-static u32 kexec_pmd1[1024] PAGE_ALIGNED;
-#endif
-static u32 kexec_pte0[1024] PAGE_ALIGNED;
-static u32 kexec_pte1[1024] PAGE_ALIGNED;
-
 static void set_idt(void *newidt, __u16 limit)
 {
 	struct desc_ptr curidt;
@@ -76,6 +68,37 @@ static void load_segments(void)
 #undef __STR
 }
 
+static void machine_kexec_free_page_tables(struct kimage *image)
+{
+	free_page((unsigned long)image->arch.pgd);
+#ifdef CONFIG_X86_PAE
+	free_page((unsigned long)image->arch.pmd0);
+	free_page((unsigned long)image->arch.pmd1);
+#endif
+	free_page((unsigned long)image->arch.pte0);
+	free_page((unsigned long)image->arch.pte1);
+}
+
+static int machine_kexec_alloc_page_tables(struct kimage *image)
+{
+	image->arch.pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL);
+#ifdef CONFIG_X86_PAE
+	image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
+	image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
+#endif
+	image->arch.pte0 = (pte_t *)get_zeroed_page(GFP_KERNEL);
+	image->arch.pte1 = (pte_t *)get_zeroed_page(GFP_KERNEL);
+	if (!image->arch.pgd ||
+#ifdef CONFIG_X86_PAE
+	    !image->arch.pmd0 || !image->arch.pmd1 ||
+#endif
+	    !image->arch.pte0 || !image->arch.pte1) {
+		machine_kexec_free_page_tables(image);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
 /*
  * A architecture hook called to validate the
  * proposed image and prepare the control pages
@@ -87,13 +110,14 @@ static void load_segments(void)
  * reboot code buffer to allow us to avoid allocations
  * later.
  *
- * Make control page executable.
+ * - Make control page executable.
+ * - Allocate page tables
  */
 int machine_kexec_prepare(struct kimage *image)
 {
 	if (nx_enabled)
 		set_pages_x(image->control_code_page, 1);
-	return 0;
+	return machine_kexec_alloc_page_tables(image);
 }
 
 /*
@@ -104,6 +128,7 @@ void machine_kexec_cleanup(struct kimage *image)
 {
 	if (nx_enabled)
 		set_pages_nx(image->control_code_page, 1);
+	machine_kexec_free_page_tables(image);
 }
 
 /*
@@ -150,18 +175,18 @@ void machine_kexec(struct kimage *image)
 	relocate_kernel_ptr = control_page;
 	page_list[PA_CONTROL_PAGE] = __pa(control_page);
 	page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
-	page_list[PA_PGD] = __pa(kexec_pgd);
-	page_list[VA_PGD] = (unsigned long)kexec_pgd;
+	page_list[PA_PGD] = __pa(image->arch.pgd);
+	page_list[VA_PGD] = (unsigned long)image->arch.pgd;
 #ifdef CONFIG_X86_PAE
-	page_list[PA_PMD_0] = __pa(kexec_pmd0);
-	page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
-	page_list[PA_PMD_1] = __pa(kexec_pmd1);
-	page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
+	page_list[PA_PMD_0] = __pa(image->arch.pmd0);
+	page_list[VA_PMD_0] = (unsigned long)image->arch.pmd0;
+	page_list[PA_PMD_1] = __pa(image->arch.pmd1);
+	page_list[VA_PMD_1] = (unsigned long)image->arch.pmd1;
 #endif
-	page_list[PA_PTE_0] = __pa(kexec_pte0);
-	page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
-	page_list[PA_PTE_1] = __pa(kexec_pte1);
-	page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+	page_list[PA_PTE_0] = __pa(image->arch.pte0);
+	page_list[VA_PTE_0] = (unsigned long)image->arch.pte0;
+	page_list[PA_PTE_1] = __pa(image->arch.pte1);
+	page_list[VA_PTE_1] = (unsigned long)image->arch.pte1;
 
 	if (image->type == KEXEC_TYPE_DEFAULT)
 		page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 17f76fc05173..adc34f2c6eff 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -100,6 +100,10 @@ struct kimage {
 #define KEXEC_TYPE_DEFAULT 0
 #define KEXEC_TYPE_CRASH   1
 	unsigned int preserve_context : 1;
+
+#ifdef ARCH_HAS_KIMAGE_ARCH
+	struct kimage_arch arch;
+#endif
 };
 
 
-- 
cgit v1.2.3-70-g09d2


From 9868ee63b896ee4d2ceb8c292e88d7f4e66caaf9 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Fri, 31 Oct 2008 09:48:15 +0800
Subject: kexec/i386: setup kexec page table in C

Impact: change the kexec bootstrap code implementation from assembly to C

This patch transforms the kexec page tables setup code from assembler
code to C code in machine_kexec_prepare. This improves readability and
reduces code line number.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/kexec.h         |  17 +-----
 arch/x86/kernel/machine_kexec_32.c   |  59 ++++++++++++++----
 arch/x86/kernel/relocate_kernel_32.S | 114 -----------------------------------
 3 files changed, 49 insertions(+), 141 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index df9c41a9c6ae..c61d8b2ab8b9 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -5,21 +5,8 @@
 # define PA_CONTROL_PAGE	0
 # define VA_CONTROL_PAGE	1
 # define PA_PGD			2
-# define VA_PGD			3
-# define PA_PTE_0		4
-# define VA_PTE_0		5
-# define PA_PTE_1		6
-# define VA_PTE_1		7
-# define PA_SWAP_PAGE		8
-# ifdef CONFIG_X86_PAE
-#  define PA_PMD_0		9
-#  define VA_PMD_0		10
-#  define PA_PMD_1		11
-#  define VA_PMD_1		12
-#  define PAGES_NR		13
-# else
-#  define PAGES_NR		9
-# endif
+# define PA_SWAP_PAGE		3
+# define PAGES_NR		4
 #else
 # define PA_CONTROL_PAGE	0
 # define VA_CONTROL_PAGE	1
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 1100312847a5..37f420018a41 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -99,6 +99,45 @@ static int machine_kexec_alloc_page_tables(struct kimage *image)
 	return 0;
 }
 
+static void machine_kexec_page_table_set_one(
+	pgd_t *pgd, pmd_t *pmd, pte_t *pte,
+	unsigned long vaddr, unsigned long paddr)
+{
+	pud_t *pud;
+
+	pgd += pgd_index(vaddr);
+#ifdef CONFIG_X86_PAE
+	if (!(pgd_val(*pgd) & _PAGE_PRESENT))
+		set_pgd(pgd, __pgd(__pa(pmd) | _PAGE_PRESENT));
+#endif
+	pud = pud_offset(pgd, vaddr);
+	pmd = pmd_offset(pud, vaddr);
+	if (!(pmd_val(*pmd) & _PAGE_PRESENT))
+		set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
+	pte = pte_offset_kernel(pmd, vaddr);
+	set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
+}
+
+static void machine_kexec_prepare_page_tables(struct kimage *image)
+{
+	void *control_page;
+	pmd_t *pmd = 0;
+
+	control_page = page_address(image->control_code_page);
+#ifdef CONFIG_X86_PAE
+	pmd = image->arch.pmd0;
+#endif
+	machine_kexec_page_table_set_one(
+		image->arch.pgd, pmd, image->arch.pte0,
+		(unsigned long)control_page, __pa(control_page));
+#ifdef CONFIG_X86_PAE
+	pmd = image->arch.pmd1;
+#endif
+	machine_kexec_page_table_set_one(
+		image->arch.pgd, pmd, image->arch.pte1,
+		__pa(control_page), __pa(control_page));
+}
+
 /*
  * A architecture hook called to validate the
  * proposed image and prepare the control pages
@@ -112,12 +151,19 @@ static int machine_kexec_alloc_page_tables(struct kimage *image)
  *
  * - Make control page executable.
  * - Allocate page tables
+ * - Setup page tables
  */
 int machine_kexec_prepare(struct kimage *image)
 {
+	int error;
+
 	if (nx_enabled)
 		set_pages_x(image->control_code_page, 1);
-	return machine_kexec_alloc_page_tables(image);
+	error = machine_kexec_alloc_page_tables(image);
+	if (error)
+		return error;
+	machine_kexec_prepare_page_tables(image);
+	return 0;
 }
 
 /*
@@ -176,17 +222,6 @@ void machine_kexec(struct kimage *image)
 	page_list[PA_CONTROL_PAGE] = __pa(control_page);
 	page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
 	page_list[PA_PGD] = __pa(image->arch.pgd);
-	page_list[VA_PGD] = (unsigned long)image->arch.pgd;
-#ifdef CONFIG_X86_PAE
-	page_list[PA_PMD_0] = __pa(image->arch.pmd0);
-	page_list[VA_PMD_0] = (unsigned long)image->arch.pmd0;
-	page_list[PA_PMD_1] = __pa(image->arch.pmd1);
-	page_list[VA_PMD_1] = (unsigned long)image->arch.pmd1;
-#endif
-	page_list[PA_PTE_0] = __pa(image->arch.pte0);
-	page_list[VA_PTE_0] = (unsigned long)image->arch.pte0;
-	page_list[PA_PTE_1] = __pa(image->arch.pte1);
-	page_list[VA_PTE_1] = (unsigned long)image->arch.pte1;
 
 	if (image->type == KEXEC_TYPE_DEFAULT)
 		page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 377da3f78e8c..a160f3119725 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -10,15 +10,12 @@
 #include <asm/page.h>
 #include <asm/kexec.h>
 #include <asm/processor-flags.h>
-#include <asm/pgtable.h>
 
 /*
  * Must be relocatable PIC code callable as a C function
  */
 
 #define PTR(x) (x << 2)
-#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define PAE_PGD_ATTR (_PAGE_PRESENT)
 
 /* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
  * ~ control_page + PAGE_SIZE are used as data storage and stack for
@@ -59,117 +56,6 @@ relocate_kernel:
 	movl	%cr4, %eax
 	movl	%eax, CR4(%edi)
 
-#ifdef CONFIG_X86_PAE
-	/* map the control page at its virtual address */
-
-	movl	PTR(VA_PGD)(%ebp), %edi
-	movl	PTR(VA_CONTROL_PAGE)(%ebp), %eax
-	andl	$0xc0000000, %eax
-	shrl	$27, %eax
-	addl	%edi, %eax
-
-	movl	PTR(PA_PMD_0)(%ebp), %edx
-	orl	$PAE_PGD_ATTR, %edx
-	movl	%edx, (%eax)
-
-	movl	PTR(VA_PMD_0)(%ebp), %edi
-	movl	PTR(VA_CONTROL_PAGE)(%ebp), %eax
-	andl	$0x3fe00000, %eax
-	shrl	$18, %eax
-	addl	%edi, %eax
-
-	movl	PTR(PA_PTE_0)(%ebp), %edx
-	orl	$PAGE_ATTR, %edx
-	movl	%edx, (%eax)
-
-	movl	PTR(VA_PTE_0)(%ebp), %edi
-	movl	PTR(VA_CONTROL_PAGE)(%ebp), %eax
-	andl	$0x001ff000, %eax
-	shrl	$9, %eax
-	addl	%edi, %eax
-
-	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edx
-	orl	$PAGE_ATTR, %edx
-	movl	%edx, (%eax)
-
-	/* identity map the control page at its physical address */
-
-	movl	PTR(VA_PGD)(%ebp), %edi
-	movl	PTR(PA_CONTROL_PAGE)(%ebp), %eax
-	andl	$0xc0000000, %eax
-	shrl	$27, %eax
-	addl	%edi, %eax
-
-	movl	PTR(PA_PMD_1)(%ebp), %edx
-	orl	$PAE_PGD_ATTR, %edx
-	movl	%edx, (%eax)
-
-	movl	PTR(VA_PMD_1)(%ebp), %edi
-	movl	PTR(PA_CONTROL_PAGE)(%ebp), %eax
-	andl	$0x3fe00000, %eax
-	shrl	$18, %eax
-	addl	%edi, %eax
-
-	movl	PTR(PA_PTE_1)(%ebp), %edx
-	orl	$PAGE_ATTR, %edx
-	movl	%edx, (%eax)
-
-	movl	PTR(VA_PTE_1)(%ebp), %edi
-	movl	PTR(PA_CONTROL_PAGE)(%ebp), %eax
-	andl	$0x001ff000, %eax
-	shrl	$9, %eax
-	addl	%edi, %eax
-
-	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edx
-	orl	$PAGE_ATTR, %edx
-	movl	%edx, (%eax)
-#else
-	/* map the control page at its virtual address */
-
-	movl	PTR(VA_PGD)(%ebp), %edi
-	movl	PTR(VA_CONTROL_PAGE)(%ebp), %eax
-	andl	$0xffc00000, %eax
-	shrl	$20, %eax
-	addl	%edi, %eax
-
-	movl	PTR(PA_PTE_0)(%ebp), %edx
-	orl	$PAGE_ATTR, %edx
-	movl	%edx, (%eax)
-
-	movl	PTR(VA_PTE_0)(%ebp), %edi
-	movl	PTR(VA_CONTROL_PAGE)(%ebp), %eax
-	andl	$0x003ff000, %eax
-	shrl	$10, %eax
-	addl	%edi, %eax
-
-	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edx
-	orl	$PAGE_ATTR, %edx
-	movl	%edx, (%eax)
-
-	/* identity map the control page at its physical address */
-
-	movl	PTR(VA_PGD)(%ebp), %edi
-	movl	PTR(PA_CONTROL_PAGE)(%ebp), %eax
-	andl	$0xffc00000, %eax
-	shrl	$20, %eax
-	addl	%edi, %eax
-
-	movl	PTR(PA_PTE_1)(%ebp), %edx
-	orl	$PAGE_ATTR, %edx
-	movl	%edx, (%eax)
-
-	movl	PTR(VA_PTE_1)(%ebp), %edi
-	movl	PTR(PA_CONTROL_PAGE)(%ebp), %eax
-	andl	$0x003ff000, %eax
-	shrl	$10, %eax
-	addl	%edi, %eax
-
-	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edx
-	orl	$PAGE_ATTR, %edx
-	movl	%edx, (%eax)
-#endif
-
-relocate_new_kernel:
 	/* read the arguments and say goodbye to the stack */
 	movl  20+4(%esp), %ebx /* page_list */
 	movl  20+8(%esp), %ebp /* list of pages */
-- 
cgit v1.2.3-70-g09d2


From a376f30a95a796cde81d6dffde0f5243c8bd8f92 Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Fri, 31 Oct 2008 17:43:04 +0800
Subject: x86: avoid duplicate running of pud_offset and pmd_offset in
 one_md_table_init()

Impact: simplify implementation, cleanup

If !(pgd_val(*pgd) & _PAGE_PRESENT) in PAE mode, we need not get value of
pmd_table again.

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_32.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8396868e82c5..7f8a2daa3fde 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -102,6 +102,8 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
 		set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
 		pud = pud_offset(pgd, 0);
 		BUG_ON(pmd_table != pmd_offset(pud, 0));
+
+		return pmd_table;
 	}
 #endif
 	pud = pud_offset(pgd, 0);
-- 
cgit v1.2.3-70-g09d2


From b2bcc7b299f37037b4a78dc1538e5d6508ae8110 Mon Sep 17 00:00:00 2001
From: Alok Kataria <akataria@vmware.com>
Date: Fri, 31 Oct 2008 11:59:53 -0700
Subject: x86: add a synthetic TSC_RELIABLE feature bit

Impact: None, bit reservation only

Add a synthetic TSC_RELIABLE feature bit which will be used to mark
TSC as reliable so that we could skip all the runtime checks for
TSC stablity, which have false positives in virtual environment.

Signed-off-by: Alok N Kataria <akataria@vmware.com>
Signed-off-by: Dan Hecht <dhecht@vmware.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/cpufeature.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index cfdf8c2c5c31..e490a7932a0d 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -92,6 +92,7 @@
 #define X86_FEATURE_NOPL	(3*32+20) /* The NOPL (0F 1F) instructions */
 #define X86_FEATURE_AMDC1E	(3*32+21) /* AMD C1E detected */
 #define X86_FEATURE_XTOPOLOGY	(3*32+22) /* cpu topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
-- 
cgit v1.2.3-70-g09d2


From 49ab56ac6e1b907b7dadb72a4012460359feaf0e Mon Sep 17 00:00:00 2001
From: Alok Kataria <akataria@vmware.com>
Date: Sat, 1 Nov 2008 18:34:37 -0700
Subject: x86: add X86_FEATURE_HYPERVISOR feature bit

Impact: Number declaration only.

Add X86_FEATURE_HYPERVISOR bit (CPUID level 1, ECX, bit 31).

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/cpufeature.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index e490a7932a0d..694d1f8f1bee 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -118,6 +118,7 @@
 #define X86_FEATURE_XSAVE	(4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
 #define X86_FEATURE_OSXSAVE	(4*32+27) /* "" XSAVE enabled in the OS */
 #define X86_FEATURE_AVX		(4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_HYPERVISOR	(4*32+31) /* Running on a hypervisor */
 
 /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
 #define X86_FEATURE_XSTORE	(5*32+ 2) /* "rng" RNG present (xstore) */
@@ -238,6 +239,7 @@ extern const char * const x86_power_flags[32];
 #define cpu_has_xmm4_2		boot_cpu_has(X86_FEATURE_XMM4_2)
 #define cpu_has_x2apic		boot_cpu_has(X86_FEATURE_X2APIC)
 #define cpu_has_xsave		boot_cpu_has(X86_FEATURE_XSAVE)
+#define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
 
 #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
 # define cpu_has_invlpg		1
-- 
cgit v1.2.3-70-g09d2


From 88b094fb8d4fe43b7025ea8d487059e8813e02cd Mon Sep 17 00:00:00 2001
From: Alok Kataria <akataria@vmware.com>
Date: Mon, 27 Oct 2008 10:41:46 -0700
Subject: x86: Hypervisor detection and get tsc_freq from hypervisor

Impact: Changes timebase calibration on Vmware.

v3->v2 : Abstract the hypervisor detection and feature (tsc_freq) request
	 behind a hypervisor.c file
v2->v1 : Add a x86_hyper_vendor field to the cpuinfo_x86 structure.
	 This avoids multiple calls to the hypervisor detection function.

This patch adds function to detect if we are running under VMware.
The current way to check if we are on VMware is following,
#  check if "hypervisor present bit" is set, if so read the 0x40000000
   cpuid leaf and check for "VMwareVMware" signature.
#  if the above fails, check the DMI vendors name for "VMware" string
   if we find one we query the VMware hypervisor port to check if we are
   under VMware.

The DMI + "VMware hypervisor port check" is needed for older VMware products,
which don't implement the hypervisor signature cpuid leaf.
Also note that since we are checking for the DMI signature the hypervisor
port should never be accessed on native hardware.

This patch also adds a hypervisor_get_tsc_freq function, instead of
calibrating the frequency which can be error prone in virtualized
environment, we ask the hypervisor for it. We get the frequency from
the hypervisor by accessing the hypervisor port if we are running on VMware.
Other hypervisors too can add code to the generic routine to get frequency on
their platform.

Signed-off-by: Alok N Kataria <akataria@vmware.com>
Signed-off-by: Dan Hecht <dhecht@vmware.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/hypervisor.h | 26 ++++++++++++
 arch/x86/include/asm/processor.h  |  4 ++
 arch/x86/include/asm/vmware.h     | 26 ++++++++++++
 arch/x86/kernel/cpu/Makefile      |  1 +
 arch/x86/kernel/cpu/common.c      |  2 +
 arch/x86/kernel/cpu/hypervisor.c  | 48 +++++++++++++++++++++
 arch/x86/kernel/cpu/vmware.c      | 88 +++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/setup.c           |  7 ++++
 arch/x86/kernel/tsc.c             |  9 +++-
 9 files changed, 210 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/include/asm/hypervisor.h
 create mode 100644 arch/x86/include/asm/vmware.h
 create mode 100644 arch/x86/kernel/cpu/hypervisor.c
 create mode 100644 arch/x86/kernel/cpu/vmware.c

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
new file mode 100644
index 000000000000..369f5c5d09a1
--- /dev/null
+++ b/arch/x86/include/asm/hypervisor.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2008, VMware, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#ifndef ASM_X86__HYPERVISOR_H
+#define ASM_X86__HYPERVISOR_H
+
+extern unsigned long get_hypervisor_tsc_freq(void);
+extern void init_hypervisor(struct cpuinfo_x86 *c);
+
+#endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 5ca01e383269..a570eafa4755 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -110,6 +110,7 @@ struct cpuinfo_x86 {
 	/* Index into per_cpu list: */
 	u16			cpu_index;
 #endif
+	unsigned int		x86_hyper_vendor;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
 #define X86_VENDOR_INTEL	0
@@ -123,6 +124,9 @@ struct cpuinfo_x86 {
 
 #define X86_VENDOR_UNKNOWN	0xff
 
+#define X86_HYPER_VENDOR_NONE  0
+#define X86_HYPER_VENDOR_VMWARE 1
+
 /*
  * capabilities of CPUs
  */
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h
new file mode 100644
index 000000000000..02dfea5aebc4
--- /dev/null
+++ b/arch/x86/include/asm/vmware.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2008, VMware, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#ifndef ASM_X86__VMWARE_H
+#define ASM_X86__VMWARE_H
+
+extern unsigned long vmware_get_tsc_khz(void);
+extern int vmware_platform(void);
+
+#endif
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 82ec6075c057..a5c04e88777e 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -4,6 +4,7 @@
 
 obj-y			:= intel_cacheinfo.o addon_cpuid_features.o
 obj-y			+= proc.o capflags.o powerflags.o common.o
+obj-y			+= vmware.o hypervisor.o
 
 obj-$(CONFIG_X86_32)	+= bugs.o cmpxchg.o
 obj-$(CONFIG_X86_64)	+= bugs_64.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b9c9ea0217a9..b88595c36254 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -36,6 +36,7 @@
 #include <asm/proto.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
+#include <asm/hypervisor.h>
 
 #include "cpu.h"
 
@@ -703,6 +704,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 	detect_ht(c);
 #endif
 
+	init_hypervisor(c);
 	/*
 	 * On SMP, boot_cpu_data holds the common feature set between
 	 * all CPUs; so make sure that we indicate which features are
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
new file mode 100644
index 000000000000..7bd55064ffe9
--- /dev/null
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -0,0 +1,48 @@
+/*
+ * Common hypervisor code
+ *
+ * Copyright (C) 2008, VMware, Inc.
+ * Author : Alok N Kataria <akataria@vmware.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/vmware.h>
+
+static inline void __cpuinit
+detect_hypervisor_vendor(struct cpuinfo_x86 *c)
+{
+	if (vmware_platform()) {
+		c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE;
+	} else {
+		c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE;
+	}
+}
+
+unsigned long get_hypervisor_tsc_freq(void)
+{
+	if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE)
+		return vmware_get_tsc_khz();
+	return 0;
+}
+
+void __cpuinit init_hypervisor(struct cpuinfo_x86 *c)
+{
+	detect_hypervisor_vendor(c);
+}
+
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
new file mode 100644
index 000000000000..d5d1b75a4b77
--- /dev/null
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -0,0 +1,88 @@
+/*
+ * VMware Detection code.
+ *
+ * Copyright (C) 2008, VMware, Inc.
+ * Author : Alok N Kataria <akataria@vmware.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <linux/dmi.h>
+#include <asm/div64.h>
+
+#define CPUID_VMWARE_INFO_LEAF	0x40000000
+#define VMWARE_HYPERVISOR_MAGIC	0x564D5868
+#define VMWARE_HYPERVISOR_PORT	0x5658
+
+#define VMWARE_PORT_CMD_GETVERSION	10
+#define VMWARE_PORT_CMD_GETHZ		45
+
+#define VMWARE_PORT(cmd, eax, ebx, ecx, edx)				\
+	__asm__("inl (%%dx)" :						\
+			"=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :	\
+			"0"(VMWARE_HYPERVISOR_MAGIC),			\
+			"1"(VMWARE_PORT_CMD_##cmd),			\
+			"2"(VMWARE_HYPERVISOR_PORT), "3"(0) :		\
+			"memory");
+
+static inline int __vmware_platform(void)
+{
+	uint32_t eax, ebx, ecx, edx;
+	VMWARE_PORT(GETVERSION, eax, ebx, ecx, edx);
+	return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC;
+}
+
+static unsigned long __vmware_get_tsc_khz(void)
+{
+        uint64_t tsc_hz;
+        uint32_t eax, ebx, ecx, edx;
+
+        VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
+
+        if (eax == (uint32_t)-1)
+                return 0;
+        tsc_hz = eax | (((uint64_t)ebx) << 32);
+        do_div(tsc_hz, 1000);
+        BUG_ON(tsc_hz >> 32);
+        return tsc_hz;
+}
+
+int vmware_platform(void)
+{
+	if (cpu_has_hypervisor) {
+		unsigned int eax, ebx, ecx, edx;
+		char hyper_vendor_id[13];
+
+		cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &ebx, &ecx, &edx);
+		memcpy(hyper_vendor_id + 0, &ebx, 4);
+		memcpy(hyper_vendor_id + 4, &ecx, 4);
+		memcpy(hyper_vendor_id + 8, &edx, 4);
+		hyper_vendor_id[12] = '\0';
+		if (!strcmp(hyper_vendor_id, "VMwareVMware"))
+			return 1;
+	} else if (dmi_available && dmi_name_in_vendors("VMware") &&
+		   __vmware_platform())
+		return 1;
+
+	return 0;
+}
+
+unsigned long vmware_get_tsc_khz(void)
+{
+	BUG_ON(!vmware_platform());
+	return __vmware_get_tsc_khz();
+}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0fa6790c1dd3..f44dadfb32cf 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -98,6 +98,7 @@
 
 #include <mach_apic.h>
 #include <asm/paravirt.h>
+#include <asm/hypervisor.h>
 
 #include <asm/percpu.h>
 #include <asm/topology.h>
@@ -909,6 +910,12 @@ void __init setup_arch(char **cmdline_p)
 
 	dmi_check_system(bad_bios_dmi_table);
 
+	/*
+	 * VMware detection requires dmi to be available, so this
+	 * needs to be done after dmi_scan_machine, for the BP.
+	 */
+	init_hypervisor(&boot_cpu_data);
+
 #ifdef CONFIG_X86_32
 	probe_roms();
 #endif
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 62348e4fd8d1..6dbf0bcb44a8 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -15,6 +15,7 @@
 #include <asm/vgtod.h>
 #include <asm/time.h>
 #include <asm/delay.h>
+#include <asm/hypervisor.h>
 
 unsigned int cpu_khz;           /* TSC clocks / usec, not used here */
 EXPORT_SYMBOL(cpu_khz);
@@ -352,9 +353,15 @@ unsigned long native_calibrate_tsc(void)
 {
 	u64 tsc1, tsc2, delta, ref1, ref2;
 	unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
-	unsigned long flags, latch, ms, fast_calibrate;
+	unsigned long flags, latch, ms, fast_calibrate, tsc_khz;
 	int hpet = is_hpet_enabled(), i, loopmin;
 
+	tsc_khz = get_hypervisor_tsc_freq();
+	if (tsc_khz) {
+		printk(KERN_INFO "TSC: Frequency read from the hypervisor\n");
+		return tsc_khz;
+	}
+
 	local_irq_save(flags);
 	fast_calibrate = quick_pit_calibrate();
 	local_irq_restore(flags);
-- 
cgit v1.2.3-70-g09d2


From eca0cd028bdf0f6aaceb0d023e9c7501079a7dda Mon Sep 17 00:00:00 2001
From: Alok Kataria <akataria@vmware.com>
Date: Fri, 31 Oct 2008 12:01:58 -0700
Subject: x86: Add a synthetic TSC_RELIABLE feature bit.

Impact: Changes timebase calibration on Vmware.

Use the synthetic TSC_RELIABLE bit to workaround virtualization anomalies.

Virtual TSCs can be kept nearly in sync, but because the virtual TSC
offset is set by software, it's not perfect.  So, the TSC
synchronization test can fail. Even then the TSC can be used as a
clocksource since the VMware platform exports a reliable TSC to the
guest for timekeeping purposes. Use this bit to check if we need to
skip the TSC sync checks.

Along with this also set the CONSTANT_TSC bit when on VMware, since we
still want to use TSC as clocksource on VM running over hardware which
has unsynchronized TSC's (opteron's), since the hypervisor will take
care of providing consistent TSC to the guest.

Signed-off-by: Alok N Kataria <akataria@vmware.com>
Signed-off-by: Dan Hecht <dhecht@vmware.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/vmware.h    |  1 +
 arch/x86/kernel/cpu/hypervisor.c | 11 ++++++++++-
 arch/x86/kernel/cpu/vmware.c     | 18 ++++++++++++++++++
 arch/x86/kernel/tsc_sync.c       |  8 +++++++-
 4 files changed, 36 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h
index 02dfea5aebc4..c11b7e100d83 100644
--- a/arch/x86/include/asm/vmware.h
+++ b/arch/x86/include/asm/vmware.h
@@ -22,5 +22,6 @@
 
 extern unsigned long vmware_get_tsc_khz(void);
 extern int vmware_platform(void);
+extern void vmware_set_feature_bits(struct cpuinfo_x86 *c);
 
 #endif
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 7bd55064ffe9..35ae2b75226d 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -41,8 +41,17 @@ unsigned long get_hypervisor_tsc_freq(void)
 	return 0;
 }
 
+static inline void __cpuinit
+hypervisor_set_feature_bits(struct cpuinfo_x86 *c)
+{
+	if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) {
+		vmware_set_feature_bits(c);
+		return;
+	}
+}
+
 void __cpuinit init_hypervisor(struct cpuinfo_x86 *c)
 {
 	detect_hypervisor_vendor(c);
+	hypervisor_set_feature_bits(c);
 }
-
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index d5d1b75a4b77..2ac4394fcb90 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -86,3 +86,21 @@ unsigned long vmware_get_tsc_khz(void)
 	BUG_ON(!vmware_platform());
 	return __vmware_get_tsc_khz();
 }
+
+/*
+ * VMware hypervisor takes care of exporting a reliable TSC to the guest.
+ * Still, due to timing difference when running on virtual cpus, the TSC can
+ * be marked as unstable in some cases. For example, the TSC sync check at
+ * bootup can fail due to a marginal offset between vcpus' TSCs (though the
+ * TSCs do not drift from each other).  Also, the ACPI PM timer clocksource
+ * is not suitable as a watchdog when running on a hypervisor because the
+ * kernel may miss a wrap of the counter if the vcpu is descheduled for a
+ * long time. To skip these checks at runtime we set these capability bits,
+ * so that the kernel could just trust the hypervisor with providing a
+ * reliable virtual TSC that is suitable for timekeeping.
+ */
+void __cpuinit vmware_set_feature_bits(struct cpuinfo_x86 *c)
+{
+	set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+	set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
+}
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 9ffb01c31c40..5977c40a138f 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -108,6 +108,12 @@ void __cpuinit check_tsc_sync_source(int cpu)
 	if (unsynchronized_tsc())
 		return;
 
+	if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
+		printk(KERN_INFO
+		       "Skipping synchronization checks as TSC is reliable.\n");
+		return;
+	}
+
 	printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:",
 			  smp_processor_id(), cpu);
 
@@ -161,7 +167,7 @@ void __cpuinit check_tsc_sync_target(void)
 {
 	int cpus = 2;
 
-	if (unsynchronized_tsc())
+	if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
 		return;
 
 	/*
-- 
cgit v1.2.3-70-g09d2


From 395628ef4ea12ff0748099f145363b5e33c69acb Mon Sep 17 00:00:00 2001
From: Alok Kataria <akataria@vmware.com>
Date: Fri, 24 Oct 2008 17:22:01 -0700
Subject: x86: Skip verification by the watchdog for TSC clocksource.

Impact: Changes timekeeping on Vmware (or with tsc=reliable).

This is achieved by resetting the CLOCKSOURCE_MUST_VERIFY flag.

We add a tsc=reliable commandline option to enable this.
This enables legacy hardware without HPET, LAPIC, or ACPI timers
to enter high-resolution timer mode.

Along with that have extended this to be used in virtualization environement
too. Now we also set this flag if the X86_FEATURE_TSC_RELIABLE bit is set.

This is important since there is a wrap-around problem with the acpi_pm timer.
The acpi_pm counter is just 24bits and this can overflow in ~4 seconds. With
the NO_HZ kernels in virtualized environment, there can be situations when
the guest is descheduled for longer duration, as a result we may miss the wrap
of the acpi counter. When TSC is used as a clocksource and acpi_pm timer is
being used as the watchdog clocksource this error in acpi_pm results in TSC
being marked as unstable, and essentially results in time dropping in chunks
of 4 seconds whenever this wrap is missed. Since the virtualized TSC is
reliable on VMware, we should always use the TSCs clocksource on VMware, so
we skip the verfication at runtime, by checking for the feature bit.

Since we reset the flag for mgeode systems too, i have combined
the mgeode case with the feature bit check.

Signed-off-by: Jeff Hansen <jhansen@cardaccess-inc.com>
Signed-off-by: Alok N Kataria <akataria@vmware.com>
Signed-off-by: Dan Hecht <dhecht@vmware.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 Documentation/kernel-parameters.txt |  7 +++++++
 arch/x86/kernel/tsc.c               | 33 +++++++++++++++++++++------------
 2 files changed, 28 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1bbcaa8982b6..dc6b06f67fca 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2267,6 +2267,13 @@ and is between 256 and 4096 characters. It is defined in the file
 			Format:
 			<io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq>
 
+	tsc=		Disable clocksource-must-verify flag for TSC.
+			Format: <string>
+			[x86] reliable: mark tsc clocksource as reliable, this
+			disables clocksource verification at runtime.
+			Used to enable high-resolution timer mode on older
+			hardware, and in virtualized environment.
+
 	turbografx.map[2|3]=	[HW,JOY]
 			TurboGraFX parallel port interface
 			Format:
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6dbf0bcb44a8..ee01cd96b5e1 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -32,6 +32,7 @@ static int tsc_unstable;
    erroneous rdtsc usage on !cpu_has_tsc processors */
 static int tsc_disabled = -1;
 
+static int tsc_clocksource_reliable;
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
@@ -99,6 +100,15 @@ int __init notsc_setup(char *str)
 
 __setup("notsc", notsc_setup);
 
+static int __init tsc_setup(char *str)
+{
+	if (!strcmp(str, "reliable"))
+		tsc_clocksource_reliable = 1;
+	return 1;
+}
+
+__setup("tsc=", tsc_setup);
+
 #define MAX_RETRIES     5
 #define SMI_TRESHOLD    50000
 
@@ -738,24 +748,21 @@ static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
 	{}
 };
 
-/*
- * Geode_LX - the OLPC CPU has a possibly a very reliable TSC
- */
+static void __init check_system_tsc_reliable(void)
+{
 #ifdef CONFIG_MGEODE_LX
-/* RTSC counts during suspend */
+	/* RTSC counts during suspend */
 #define RTSC_SUSP 0x100
-
-static void __init check_geode_tsc_reliable(void)
-{
 	unsigned long res_low, res_high;
 
 	rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
+	/* Geode_LX - the OLPC CPU has a possibly a very reliable TSC */
 	if (res_low & RTSC_SUSP)
-		clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
-}
-#else
-static inline void check_geode_tsc_reliable(void) { }
+		tsc_clocksource_reliable = 1;
 #endif
+	if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
+		tsc_clocksource_reliable = 1;
+}
 
 /*
  * Make an educated guess if the TSC is trustworthy and synchronized
@@ -790,6 +797,8 @@ static void __init init_tsc_clocksource(void)
 {
 	clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
 			clocksource_tsc.shift);
+	if (tsc_clocksource_reliable)
+		clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
 	/* lower the rating if we already know its unstable: */
 	if (check_tsc_unstable()) {
 		clocksource_tsc.rating = 0;
@@ -850,7 +859,7 @@ void __init tsc_init(void)
 	if (unsynchronized_tsc())
 		mark_tsc_unstable("TSCs unsynchronized");
 
-	check_geode_tsc_reliable();
+	check_system_tsc_reliable();
 	init_tsc_clocksource();
 }
 
-- 
cgit v1.2.3-70-g09d2


From 3555105333ae55414d0fe051557bd7dc590f5255 Mon Sep 17 00:00:00 2001
From: Gary Hade <garyhade@us.ibm.com>
Date: Fri, 31 Oct 2008 10:52:03 -0700
Subject: x86: add memory hotremove config option

Impact: enable CONFIG_MEMORY_HOTREMOVE feature on x86. (default-off)

Memory hotremove functionality can currently be configured into
the ia64, powerpc, and s390 kernels.

This patch makes it possible to configure the memory hotremove
functionality into the x86 kernel as well.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Gary Hade <garyhade@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c00aefcb47d5..25e711526116 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1492,6 +1492,10 @@ config ARCH_ENABLE_MEMORY_HOTPLUG
 	def_bool y
 	depends on X86_64 || (X86_32 && HIGHMEM)
 
+config ARCH_ENABLE_MEMORY_HOTREMOVE
+	def_bool y
+	depends on MEMORY_HOTPLUG
+
 config HAVE_ARCH_EARLY_PFN_TO_NID
 	def_bool X86_64
 	depends on NUMA
-- 
cgit v1.2.3-70-g09d2


From 6bdbfe99916398dbb28d83833cc04757110f2738 Mon Sep 17 00:00:00 2001
From: Alok Kataria <akataria@vmware.com>
Date: Mon, 3 Nov 2008 11:31:28 -0800
Subject: x86: VMware: Fix vmware_get_tsc code

Impact: Fix possible failure to calibrate the TSC on Vmware near 4 GHz

The current version of the code to get the tsc frequency from
the VMware hypervisor, will be broken on processor with frequency
(4G-1) HZ, because on such processors eax will have UINT_MAX
and that would be legitimate.
We instead check that EBX did change to decide if we were able to
read the frequency from the hypervisor.

Signed-off-by: Alok N Kataria <akataria@vmware.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/vmware.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 2ac4394fcb90..a0905ecfe7d2 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -36,7 +36,7 @@
 			"=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :	\
 			"0"(VMWARE_HYPERVISOR_MAGIC),			\
 			"1"(VMWARE_PORT_CMD_##cmd),			\
-			"2"(VMWARE_HYPERVISOR_PORT), "3"(0) :		\
+			"2"(VMWARE_HYPERVISOR_PORT), "3"(UINT_MAX) :	\
 			"memory");
 
 static inline int __vmware_platform(void)
@@ -53,7 +53,7 @@ static unsigned long __vmware_get_tsc_khz(void)
 
         VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
 
-        if (eax == (uint32_t)-1)
+        if (ebx == UINT_MAX)
                 return 0;
         tsc_hz = eax | (((uint64_t)ebx) << 32);
         do_div(tsc_hz, 1000);
-- 
cgit v1.2.3-70-g09d2


From 124ffe1456d6efea5b32cc6d36e3fa434cdc84d9 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Mon, 3 Nov 2008 19:23:01 -0800
Subject: x86: signal_64: remove unused code in __setup_rt_frame()

Impact: cleanup

sizeof(*set) is always 8 on x86_64.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_64.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 83990db82f74..cfbb60a5f9d2 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -251,11 +251,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
 	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me);
 	err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate);
-	if (sizeof(*set) == 16) {
-		__put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
-		__put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
-	} else
-		err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
-- 
cgit v1.2.3-70-g09d2


From 6cf87efbc7a3676e0ad7c9622ec6aec244a593bc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 4 Nov 2008 10:42:23 +0100
Subject: x86 debug: mark early_printk.o as notrace

Impact: do not do function-tracing in the early-printk code

this is useful when earlyprintk=vga,keep is used to debug tracer
plugins.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index e489ff9cb3e2..943fe6026c64 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -12,6 +12,7 @@ CFLAGS_REMOVE_tsc.o = -pg
 CFLAGS_REMOVE_rtc.o = -pg
 CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
 CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_early_printk.o = -pg
 endif
 
 #
-- 
cgit v1.2.3-70-g09d2


From fd8cd7e1919fc1c27fe2fdccd2a1cd32f791ef0f Mon Sep 17 00:00:00 2001
From: Alok Kataria <akataria@vmware.com>
Date: Mon, 3 Nov 2008 15:50:38 -0800
Subject: x86: vmware: look for DMI string in the product serial key

Impact: Should permit VMware detection on older platforms where the
vendor is changed.  Could theoretically cause a regression if some
weird serial number scheme contains the string "VMware" by pure
chance.  Seems unlikely, especially with the mixed case.

In some user configured cases, VMware may choose not to put a VMware specific
DMI string, but the product serial key is always there and is VMware specific.
Add a interface to check the serial key, when checking for VMware in the DMI
information.

Signed-off-by: Alok N Kataria <akataria@vmware.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/vmware.c |  7 ++++++-
 drivers/firmware/dmi_scan.c  | 11 +++++++++++
 include/linux/dmi.h          |  2 ++
 3 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index a0905ecfe7d2..c034bda842d9 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -61,6 +61,11 @@ static unsigned long __vmware_get_tsc_khz(void)
         return tsc_hz;
 }
 
+/*
+ * While checking the dmi string infomation, just checking the product
+ * serial key should be enough, as this will always have a VMware
+ * specific string when running under VMware hypervisor.
+ */
 int vmware_platform(void)
 {
 	if (cpu_has_hypervisor) {
@@ -74,7 +79,7 @@ int vmware_platform(void)
 		hyper_vendor_id[12] = '\0';
 		if (!strcmp(hyper_vendor_id, "VMwareVMware"))
 			return 1;
-	} else if (dmi_available && dmi_name_in_vendors("VMware") &&
+	} else if (dmi_available && dmi_name_in_serial("VMware") &&
 		   __vmware_platform())
 		return 1;
 
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 3e526b6d00cb..d66d41282907 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -467,6 +467,17 @@ const char *dmi_get_system_info(int field)
 }
 EXPORT_SYMBOL(dmi_get_system_info);
 
+/**
+ *	dmi_name_in_serial - 	Check if string is in the DMI product serial
+ *				information.
+ */
+int dmi_name_in_serial(const char *str)
+{
+	int f = DMI_PRODUCT_SERIAL;
+	if (dmi_ident[f] && strstr(dmi_ident[f], str))
+		return 1;
+	return 0;
+}
 
 /**
  *	dmi_name_in_vendors - Check if string is anywhere in the DMI vendor information.
diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index e5084eb5943a..2bfda178f274 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -44,6 +44,7 @@ extern const struct dmi_device * dmi_find_device(int type, const char *name,
 extern void dmi_scan_machine(void);
 extern int dmi_get_year(int field);
 extern int dmi_name_in_vendors(const char *str);
+extern int dmi_name_in_serial(const char *str);
 extern int dmi_available;
 extern int dmi_walk(void (*decode)(const struct dmi_header *));
 
@@ -56,6 +57,7 @@ static inline const struct dmi_device * dmi_find_device(int type, const char *na
 static inline void dmi_scan_machine(void) { return; }
 static inline int dmi_get_year(int year) { return 0; }
 static inline int dmi_name_in_vendors(const char *s) { return 0; }
+static inline int dmi_name_in_serial(const char *s) { return 0; }
 #define dmi_available 0
 static inline int dmi_walk(void (*decode)(const struct dmi_header *))
 	{ return -1; }
-- 
cgit v1.2.3-70-g09d2


From 838e8bb71dc0c892bf8f84abd3c709d8fe3a8d3c Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Fri, 24 Oct 2008 16:53:33 +0200
Subject: x86: Implement change_bit with immediate operand as "lock xorb"

Impact: Minor optimization.

Implement change_bit with immediate bit count as "lock xorb". This is
similar to  "lock orb" and "lock andb"  for set_bit and clear_bit
functions.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/bitops.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 360010322711..9fa9dcdf344b 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -168,7 +168,15 @@ static inline void __change_bit(int nr, volatile unsigned long *addr)
  */
 static inline void change_bit(int nr, volatile unsigned long *addr)
 {
-	asm volatile(LOCK_PREFIX "btc %1,%0" : ADDR : "Ir" (nr));
+	if (IS_IMMEDIATE(nr)) {
+		asm volatile(LOCK_PREFIX "xorb %1,%0"
+			: CONST_MASK_ADDR(nr, addr)
+			: "iq" ((u8)CONST_MASK(nr)));
+	} else {
+		asm volatile(LOCK_PREFIX "btc %1,%0"
+			: BITOP_ADDR(addr)
+			: "Ir" (nr));
+	}
 }
 
 /**
-- 
cgit v1.2.3-70-g09d2


From 64ccf2f9a70a06ba56cd8cedfa610b4e77181587 Mon Sep 17 00:00:00 2001
From: Russ Anderson <rja@sgi.com>
Date: Wed, 5 Nov 2008 22:11:56 -0600
Subject: x86: uv: Add UV watchlist bios call

Add UV bios calls to allocate and free watchlists.

Signed-off-by: Russ Anderson <rja@sgi.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/uv/bios.h | 17 ++++++++++++++++-
 arch/x86/kernel/bios_uv.c      | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 51cadc645e6f..58105c5b0b4e 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -32,7 +32,9 @@
 enum uv_bios_cmd {
 	UV_BIOS_COMMON,
 	UV_BIOS_GET_SN_INFO,
-	UV_BIOS_FREQ_BASE
+	UV_BIOS_FREQ_BASE,
+	UV_BIOS_WATCHLIST_ALLOC,
+	UV_BIOS_WATCHLIST_FREE
 };
 
 /*
@@ -71,6 +73,15 @@ union partition_info_u {
 	};
 };
 
+union uv_watchlist_u {
+	u64	val;
+	struct {
+		u64	blade	: 16,
+			size	: 32,
+			filler	: 16;
+	};
+};
+
 /*
  * bios calls have 6 parameters
  */
@@ -80,9 +91,13 @@ extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64);
 
 extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *);
 extern s64 uv_bios_freq_base(u64, u64 *);
+extern int uv_bios_mq_watchlist_alloc(int, void *, unsigned int,
+					unsigned long *);
+extern int uv_bios_mq_watchlist_free(int, int);
 
 extern void uv_bios_init(void);
 
+extern unsigned long sn_rtc_cycles_per_second;
 extern int uv_type;
 extern long sn_partition_id;
 extern long sn_coherency_id;
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index 7cefb7170e75..4c02b2799216 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -100,6 +100,39 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
 	return ret;
 }
 
+int
+uv_bios_mq_watchlist_alloc(int blade, void *mq, unsigned int mq_size,
+			   unsigned long *intr_mmr_offset)
+{
+	union uv_watchlist_u size_blade;
+	unsigned long addr;
+	u64 watchlist;
+	s64 ret;
+
+	addr = (unsigned long)mq;
+	size_blade.size = mq_size;
+	size_blade.blade = blade;
+
+	/*
+	 * bios returns watchlist number or negative error number.
+	 */
+	ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr,
+			size_blade.val, (u64)intr_mmr_offset,
+			(u64)&watchlist, 0);
+	if (ret < BIOS_STATUS_SUCCESS)
+		return ret;
+
+	return watchlist;
+}
+EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_alloc);
+
+int
+uv_bios_mq_watchlist_free(int blade, int watchlist_num)
+{
+	return (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_FREE,
+				blade, watchlist_num, 0, 0, 0);
+}
+EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free);
 
 s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
 {
-- 
cgit v1.2.3-70-g09d2


From e8929c8a6acbecbd629b8e3f2d1a2546ec4ebdfc Mon Sep 17 00:00:00 2001
From: Russ Anderson <rja@sgi.com>
Date: Wed, 5 Nov 2008 22:13:44 -0600
Subject: x86: uv: Add UV memory protection bios call

Add UV bios call to change memory protections.

Signed-off-by: Russ Anderson <rja@sgi.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/uv/bios.h | 10 +++++++++-
 arch/x86/kernel/bios_uv.c      |  8 ++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 58105c5b0b4e..a301a56d4157 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -34,7 +34,8 @@ enum uv_bios_cmd {
 	UV_BIOS_GET_SN_INFO,
 	UV_BIOS_FREQ_BASE,
 	UV_BIOS_WATCHLIST_ALLOC,
-	UV_BIOS_WATCHLIST_FREE
+	UV_BIOS_WATCHLIST_FREE,
+	UV_BIOS_MEMPROTECT
 };
 
 /*
@@ -82,6 +83,12 @@ union uv_watchlist_u {
 	};
 };
 
+enum uv_memprotect {
+	UV_MEMPROT_RESTRICT_ACCESS,
+	UV_MEMPROT_ALLOW_AMO,
+	UV_MEMPROT_ALLOW_RW
+};
+
 /*
  * bios calls have 6 parameters
  */
@@ -94,6 +101,7 @@ extern s64 uv_bios_freq_base(u64, u64 *);
 extern int uv_bios_mq_watchlist_alloc(int, void *, unsigned int,
 					unsigned long *);
 extern int uv_bios_mq_watchlist_free(int, int);
+extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect);
 
 extern void uv_bios_init(void);
 
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index 4c02b2799216..7cf6fc3d1c10 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -134,6 +134,14 @@ uv_bios_mq_watchlist_free(int blade, int watchlist_num)
 }
 EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free);
 
+s64
+uv_bios_change_memprotect(u64 paddr, u64 len, enum uv_memprotect perms)
+{
+	return uv_bios_call_irqsave(UV_BIOS_MEMPROTECT, paddr, len,
+					perms, 0, 0);
+}
+EXPORT_SYMBOL_GPL(uv_bios_change_memprotect);
+
 s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
 {
 	return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type,
-- 
cgit v1.2.3-70-g09d2


From 23c357003b3671cdfb17bc4d5383589e74b71511 Mon Sep 17 00:00:00 2001
From: Russ Anderson <rja@sgi.com>
Date: Wed, 5 Nov 2008 22:15:13 -0600
Subject: x86: uv: Add UV reserved page bios call

Add UV bios call to get the address of the reserved page.

Signed-off-by: Russ Anderson <rja@sgi.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/uv/bios.h |  5 ++++-
 arch/x86/kernel/bios_uv.c      | 11 +++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index a301a56d4157..da1c4e8e78fc 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -35,13 +35,15 @@ enum uv_bios_cmd {
 	UV_BIOS_FREQ_BASE,
 	UV_BIOS_WATCHLIST_ALLOC,
 	UV_BIOS_WATCHLIST_FREE,
-	UV_BIOS_MEMPROTECT
+	UV_BIOS_MEMPROTECT,
+	UV_BIOS_GET_PARTITION_ADDR
 };
 
 /*
  * Status values returned from a BIOS call.
  */
 enum {
+	BIOS_STATUS_MORE_PASSES		=  1,
 	BIOS_STATUS_SUCCESS		=  0,
 	BIOS_STATUS_UNIMPLEMENTED	= -ENOSYS,
 	BIOS_STATUS_EINVAL		= -EINVAL,
@@ -102,6 +104,7 @@ extern int uv_bios_mq_watchlist_alloc(int, void *, unsigned int,
 					unsigned long *);
 extern int uv_bios_mq_watchlist_free(int, int);
 extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect);
+extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *);
 
 extern void uv_bios_init(void);
 
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index 7cf6fc3d1c10..d22d0f1bbea0 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -142,6 +142,17 @@ uv_bios_change_memprotect(u64 paddr, u64 len, enum uv_memprotect perms)
 }
 EXPORT_SYMBOL_GPL(uv_bios_change_memprotect);
 
+s64
+uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len)
+{
+	s64 ret;
+
+	ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie,
+					(u64)addr, buf, (u64)len, 0);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa);
+
 s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
 {
 	return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type,
-- 
cgit v1.2.3-70-g09d2


From 4b33669e817a01dd99ff91df330d504ccfb2e99c Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 5 Nov 2008 18:30:25 -0800
Subject: x86: signal_32: do save_i387_xstate() at get_sigframe()

Impact: cleanup

move calling save_i387_xstate() into get_sigframe() from setup_sigcontext()
like 64bit.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index a0efc1b3c4c9..6a05c74b4084 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -303,11 +303,7 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 	err |= __put_user(regs->sp, &sc->sp_at_signal);
 	err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
 
-	tmp = save_i387_xstate(fpstate);
-	if (tmp < 0)
-		err = 1;
-	else
-		err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
+	err |= __put_user(fpstate, &sc->fpstate);
 
 	/* non-iBCS2 extensions.. */
 	err |= __put_user(mask, &sc->oldmask);
@@ -350,6 +346,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 	if (used_math()) {
 		sp = sp - sig_xstate_size;
 		*fpstate = (struct _fpstate *) sp;
+		if (save_i387_xstate(*fpstate) < 0)
+			return (void __user *)-1L;
 	}
 
 	sp -= frame_size;
-- 
cgit v1.2.3-70-g09d2


From 99ea1b93bf80a287dd70499b96d9c4d06f320ff2 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 5 Nov 2008 18:32:54 -0800
Subject: x86: ia32_signal: do save_i387_xstate_ia32 at get_sigframe()

Impact: cleanup

move calling save_i387_xstate_ia32() into get_sigframe() from
setup_sigcontext().

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 4bc02b23674b..47ddc23f4f54 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -367,12 +367,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
 	err |= __put_user(regs->flags, &sc->flags);
 	err |= __put_user(regs->sp, &sc->sp_at_signal);
 
-	tmp = save_i387_xstate_ia32(fpstate);
-	if (tmp < 0)
-		err = -EFAULT;
-	else
-		err |= __put_user(ptr_to_compat(tmp ? fpstate : NULL),
-					&sc->fpstate);
+	err |= __put_user(ptr_to_compat(fpstate), &sc->fpstate);
 
 	/* non-iBCS2 extensions.. */
 	err |= __put_user(mask, &sc->oldmask);
@@ -408,6 +403,8 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
 	if (used_math()) {
 		sp = sp - sig_xstate_ia32_size;
 		*fpstate = (struct _fpstate_ia32 *) sp;
+		if (save_i387_xstate_ia32(*fpstate) < 0)
+			return (void __user *) -1L;
 	}
 
 	sp -= frame_size;
-- 
cgit v1.2.3-70-g09d2


From ee7d523c124a186ce3a886868de9cd1d8bc991f3 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 5 Nov 2008 18:33:35 -0800
Subject: x86: signal_64: setup fpstate in setup_sigcontext()

Impact: cleanup

set fpstate field of signal context at setup_sigcontext().

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_64.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index cfbb60a5f9d2..97d26fa62ac1 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -159,8 +159,9 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
  */
 
 static inline int
-setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
-		unsigned long mask, struct task_struct *me)
+setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
+		 struct pt_regs *regs,
+		 unsigned long mask, struct task_struct *me)
 {
 	int err = 0;
 
@@ -188,6 +189,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
 	err |= __put_user(me->thread.error_code, &sc->err);
 	err |= __put_user(regs->ip, &sc->ip);
 	err |= __put_user(regs->flags, &sc->flags);
+	err |= __put_user(fpstate, &sc->fpstate);
 	err |= __put_user(mask, &sc->oldmask);
 	err |= __put_user(me->thread.cr2, &sc->cr2);
 
@@ -249,8 +251,8 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	err |= __put_user(sas_ss_flags(regs->sp),
 			  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
-	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me);
-	err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate);
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, fp,
+				regs, set->sig[0], me);
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 
 	/* Set up to return from userspace.  If provided, use a stub
-- 
cgit v1.2.3-70-g09d2


From 8735b7d0a2a6246faa406a8cdd1376bd0e689ba3 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 5 Nov 2008 18:34:35 -0800
Subject: x86: signal_64: make setup_sigcontext() similar

Impact: cleanup

remove passing task struct.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_64.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 97d26fa62ac1..3868c2a21793 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -160,8 +160,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
 
 static inline int
 setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
-		 struct pt_regs *regs,
-		 unsigned long mask, struct task_struct *me)
+		 struct pt_regs *regs, unsigned long mask)
 {
 	int err = 0;
 
@@ -185,13 +184,13 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 	err |= __put_user(regs->r13, &sc->r13);
 	err |= __put_user(regs->r14, &sc->r14);
 	err |= __put_user(regs->r15, &sc->r15);
-	err |= __put_user(me->thread.trap_no, &sc->trapno);
-	err |= __put_user(me->thread.error_code, &sc->err);
+	err |= __put_user(current->thread.trap_no, &sc->trapno);
+	err |= __put_user(current->thread.error_code, &sc->err);
 	err |= __put_user(regs->ip, &sc->ip);
 	err |= __put_user(regs->flags, &sc->flags);
 	err |= __put_user(fpstate, &sc->fpstate);
 	err |= __put_user(mask, &sc->oldmask);
-	err |= __put_user(me->thread.cr2, &sc->cr2);
+	err |= __put_user(current->thread.cr2, &sc->cr2);
 
 	return err;
 }
@@ -251,8 +250,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	err |= __put_user(sas_ss_flags(regs->sp),
 			  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
-	err |= setup_sigcontext(&frame->uc.uc_mcontext, fp,
-				regs, set->sig[0], me);
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 
 	/* Set up to return from userspace.  If provided, use a stub
-- 
cgit v1.2.3-70-g09d2


From fd51b2d7d5df932767b89e00d0871a38a2c53e74 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Wed, 5 Nov 2008 02:27:19 +0900
Subject: x86: update CONFIG_NUMA description

Impact: clarify/update CONFIG_NUMA text

CONFIG_NUMA description talk about a bit old thing.
So, following changes are better.

 o CONFIG_NUMA is no longer EXPERIMENTAL

 o Opteron is not the only processor of NUMA topology on x86_64 no longer,
   but also Intel Core7i has it.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 350bee1d54dc..38ae04bf6514 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -951,22 +951,26 @@ config ARCH_PHYS_ADDR_T_64BIT
 
 # Common NUMA Features
 config NUMA
-	bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)"
+	bool "Numa Memory Allocation and Scheduler Support"
 	depends on SMP
 	depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL)
 	default n if X86_PC
 	default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
 	help
 	  Enable NUMA (Non Uniform Memory Access) support.
+
 	  The kernel will try to allocate memory used by a CPU on the
 	  local memory controller of the CPU and add some more
 	  NUMA awareness to the kernel.
 
-	  For 32-bit this is currently highly experimental and should be only
-	  used for kernel development. It might also cause boot failures.
-	  For 64-bit this is recommended on all multiprocessor Opteron systems.
-	  If the system is EM64T, you should say N unless your system is
-	  EM64T NUMA.
+	  For 64-bit this is recommended if the system is Intel Core 7i
+	  (or later), AMD Opteron, or EM64T NUMA.
+
+	  For 32-bit this is only needed on (rare) 32-bit-only platforms
+	  that support NUMA topologies, such as NUMAQ / Summit, or if you
+	  boot a 32-bit kernel on a 64-bit NUMA platform.
+
+	  Otherwise, you should say N.
 
 comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
 	depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI)
-- 
cgit v1.2.3-70-g09d2


From 15002fa9bf3a79ac9dcafba7ff308586936088b2 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Fri, 7 Nov 2008 19:25:36 -0800
Subject: x86: signal: cosmetic unification of setup_sigcontext()

Impact: cleanup

Make setup_sigcontext() same.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 32 ++++++++++++++++++++++++++++----
 arch/x86/kernel/signal_64.c | 33 ++++++++++++++++++++++++++++-----
 2 files changed, 56 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 6a05c74b4084..27a5c8174322 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -279,14 +279,20 @@ static int
 setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 		 struct pt_regs *regs, unsigned long mask)
 {
-	int tmp, err = 0;
+	int err = 0;
 
-	err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
-	savesegment(gs, tmp);
-	err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
+#ifdef CONFIG_X86_32
+	{
+		unsigned int tmp;
 
+		savesegment(gs, tmp);
+		err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
+	}
+	err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
 	err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
 	err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
+#endif /* CONFIG_X86_32 */
+
 	err |= __put_user(regs->di, &sc->di);
 	err |= __put_user(regs->si, &sc->si);
 	err |= __put_user(regs->bp, &sc->bp);
@@ -295,13 +301,31 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 	err |= __put_user(regs->dx, &sc->dx);
 	err |= __put_user(regs->cx, &sc->cx);
 	err |= __put_user(regs->ax, &sc->ax);
+#ifdef CONFIG_X86_64
+	err |= __put_user(regs->r8, &sc->r8);
+	err |= __put_user(regs->r9, &sc->r9);
+	err |= __put_user(regs->r10, &sc->r10);
+	err |= __put_user(regs->r11, &sc->r11);
+	err |= __put_user(regs->r12, &sc->r12);
+	err |= __put_user(regs->r13, &sc->r13);
+	err |= __put_user(regs->r14, &sc->r14);
+	err |= __put_user(regs->r15, &sc->r15);
+#endif /* CONFIG_X86_64 */
+
 	err |= __put_user(current->thread.trap_no, &sc->trapno);
 	err |= __put_user(current->thread.error_code, &sc->err);
 	err |= __put_user(regs->ip, &sc->ip);
+#ifdef CONFIG_X86_32
 	err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
 	err |= __put_user(regs->flags, &sc->flags);
 	err |= __put_user(regs->sp, &sc->sp_at_signal);
 	err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
+#else /* !CONFIG_X86_32 */
+	err |= __put_user(regs->flags, &sc->flags);
+	err |= __put_user(regs->cs, &sc->cs);
+	err |= __put_user(0, &sc->gs);
+	err |= __put_user(0, &sc->fs);
+#endif /* CONFIG_X86_32 */
 
 	err |= __put_user(fpstate, &sc->fpstate);
 
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 3868c2a21793..d2307e41fbdb 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -157,16 +157,23 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
 /*
  * Set up a signal frame.
  */
-
-static inline int
+static int
 setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 		 struct pt_regs *regs, unsigned long mask)
 {
 	int err = 0;
 
-	err |= __put_user(regs->cs, &sc->cs);
-	err |= __put_user(0, &sc->gs);
-	err |= __put_user(0, &sc->fs);
+#ifdef CONFIG_X86_32
+	{
+		unsigned int tmp;
+
+		savesegment(gs, tmp);
+		err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
+	}
+	err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
+	err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
+	err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
+#endif /* CONFIG_X86_32 */
 
 	err |= __put_user(regs->di, &sc->di);
 	err |= __put_user(regs->si, &sc->si);
@@ -176,6 +183,7 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 	err |= __put_user(regs->dx, &sc->dx);
 	err |= __put_user(regs->cx, &sc->cx);
 	err |= __put_user(regs->ax, &sc->ax);
+#ifdef CONFIG_X86_64
 	err |= __put_user(regs->r8, &sc->r8);
 	err |= __put_user(regs->r9, &sc->r9);
 	err |= __put_user(regs->r10, &sc->r10);
@@ -184,11 +192,26 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 	err |= __put_user(regs->r13, &sc->r13);
 	err |= __put_user(regs->r14, &sc->r14);
 	err |= __put_user(regs->r15, &sc->r15);
+#endif /* CONFIG_X86_64 */
+
 	err |= __put_user(current->thread.trap_no, &sc->trapno);
 	err |= __put_user(current->thread.error_code, &sc->err);
 	err |= __put_user(regs->ip, &sc->ip);
+#ifdef CONFIG_X86_32
+	err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
 	err |= __put_user(regs->flags, &sc->flags);
+	err |= __put_user(regs->sp, &sc->sp_at_signal);
+	err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
+#else /* !CONFIG_X86_32 */
+	err |= __put_user(regs->flags, &sc->flags);
+	err |= __put_user(regs->cs, &sc->cs);
+	err |= __put_user(0, &sc->gs);
+	err |= __put_user(0, &sc->fs);
+#endif /* CONFIG_X86_32 */
+
 	err |= __put_user(fpstate, &sc->fpstate);
+
+	/* non-iBCS2 extensions.. */
 	err |= __put_user(mask, &sc->oldmask);
 	err |= __put_user(current->thread.cr2, &sc->cr2);
 
-- 
cgit v1.2.3-70-g09d2


From cb9e35dce94a1b9c59d46224e8a94377d673e204 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 8 Nov 2008 20:27:00 +0100
Subject: x86: clean up rdtsc_barrier() use

Impact: cleanup

Move rdtsc_barrier() use to vsyscall_64.c where it's relied on,
and point out its role in the context of its use.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/tsc.h    | 6 +-----
 arch/x86/kernel/vsyscall_64.c | 9 +++++++++
 2 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 9cd83a8e40d5..700aeb8d2098 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -44,11 +44,7 @@ static __always_inline cycles_t vget_cycles(void)
 	if (!cpu_has_tsc)
 		return 0;
 #endif
-	rdtsc_barrier();
-	cycles = (cycles_t)__native_read_tsc();
-	rdtsc_barrier();
-
-	return cycles;
+	return (cycles_t)__native_read_tsc();
 }
 
 extern void tsc_init(void);
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 0b8b6690a86d..ebf2f12900f5 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -128,7 +128,16 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
 			gettimeofday(tv,NULL);
 			return;
 		}
+
+		/*
+		 * Surround the RDTSC by barriers, to make sure it's not
+		 * speculated to outside the seqlock critical section and
+		 * does not cause time warps:
+		 */
+		rdtsc_barrier();
 		now = vread();
+		rdtsc_barrier();
+
 		base = __vsyscall_gtod_data.clock.cycle_last;
 		mask = __vsyscall_gtod_data.clock.mask;
 		mult = __vsyscall_gtod_data.clock.mult;
-- 
cgit v1.2.3-70-g09d2


From 4fcc50abdffb517cee36cec9cb22138d84fb62d0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 9 Nov 2008 08:10:03 +0100
Subject: x86: clean up vget_cycles()

Impact: remove unused variable

I forgot to remove the now unused "cycles_t cycles" parameter from
vget_cycles() - which triggers build warnings as tsc.h is included
in a number of files.

Remove it.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/tsc.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 700aeb8d2098..38ae163cc91b 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -34,8 +34,6 @@ static inline cycles_t get_cycles(void)
 
 static __always_inline cycles_t vget_cycles(void)
 {
-	cycles_t cycles;
-
 	/*
 	 * We only do VDSOs on TSC capable CPUs, so this shouldnt
 	 * access boot_cpu_data (which is not VDSO-safe):
-- 
cgit v1.2.3-70-g09d2


From 19f47c634ea8c5a10ff7bb1a08c52fd0f49bc54c Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Sun, 9 Nov 2008 21:28:15 -0800
Subject: x86: x86_32 has its own irq_regs definition

Impact: cleanup

Arches that have their own irq_regs definition are expected to
define ARCH_HAS_OWN_IRQ_REGS or else a generic (unused) set
will also be defined in lib/irq_regs.c

Sparse noticed the unused generic one had no prototype:
lib/irq_regs.c:15:1: warning: symbol 'per_cpu____irq_regs' was not declared. Should it be static?

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/irq_regs_32.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h
index af2f02d27fc7..86afd7473457 100644
--- a/arch/x86/include/asm/irq_regs_32.h
+++ b/arch/x86/include/asm/irq_regs_32.h
@@ -9,6 +9,8 @@
 
 #include <asm/percpu.h>
 
+#define ARCH_HAS_OWN_IRQ_REGS
+
 DECLARE_PER_CPU(struct pt_regs *, irq_regs);
 
 static inline struct pt_regs *get_irq_regs(void)
-- 
cgit v1.2.3-70-g09d2


From f4166c54bfe04f64603974058e44fbd7cfef0ccc Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@googlemail.com>
Date: Sun, 9 Nov 2008 14:29:21 +0100
Subject: x86, bts: DS and BTS initialization

Impact: widen BTS/PEBS ptrace enablement to more CPU models

Move BTS initialisation out of an #ifdef CONFIG_X86_64 guard.

Assume core2 BTS and DS layout for future models of family 6 processors.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/intel.c | 3 +--
 arch/x86/kernel/ds.c        | 9 ++++-----
 arch/x86/kernel/ptrace.c    | 9 ++++-----
 3 files changed, 9 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index cce0b6118d55..816f27f289b1 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -307,12 +307,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 		set_cpu_cap(c, X86_FEATURE_P4);
 	if (c->x86 == 6)
 		set_cpu_cap(c, X86_FEATURE_P3);
+#endif
 
 	if (cpu_has_bts)
 		ptrace_bts_init_intel(c);
 
-#endif
-
 	detect_extended_topology(c);
 	if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
 		/*
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 2b69994fd3a8..c570252905a1 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -821,17 +821,16 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
 	switch (c->x86) {
 	case 0x6:
 		switch (c->x86_model) {
+		case 0 ... 0xC:
+			/* sorry, don't know about them */
+			break;
 		case 0xD:
 		case 0xE: /* Pentium M */
 			ds_configure(&ds_cfg_var);
 			break;
-		case 0xF: /* Core2 */
-		case 0x1C: /* Atom */
+		default: /* Core2, Atom, ... */
 			ds_configure(&ds_cfg_64);
 			break;
-		default:
-			/* sorry, don't know about them */
-			break;
 		}
 		break;
 	case 0xF:
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a6d8c12e10d..06180dff5b2e 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -929,17 +929,16 @@ void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
 	switch (c->x86) {
 	case 0x6:
 		switch (c->x86_model) {
+		case 0 ... 0xC:
+			/* sorry, don't know about them */
+			break;
 		case 0xD:
 		case 0xE: /* Pentium M */
 			bts_configure(&bts_cfg_pentium_m);
 			break;
-		case 0xF: /* Core2 */
-        case 0x1C: /* Atom */
+		default: /* Core2, Atom, ... */
 			bts_configure(&bts_cfg_core2);
 			break;
-		default:
-			/* sorry, don't know about them */
-			break;
 		}
 		break;
 	case 0xF:
-- 
cgit v1.2.3-70-g09d2


From 4e0304310f5180eee11b4edc72cf4cb78acdc634 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Mon, 10 Nov 2008 09:16:40 +0100
Subject: x86: apic - calibrate_APIC_clock remove redundant irq-enable-disable

Impact: cleanup

lapic_timer_setup is self-protected with local_irq_save/restore
no need to use them in caller and levt is the per-cpu variable so
no concurrent access from another cpu.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Acked-by: "Maciej W. Rozycki" <macro@linux-mips.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 04a7f960bbc0..ce90dc184139 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -672,13 +672,9 @@ static int __init calibrate_APIC_clock(void)
 		while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
 			cpu_relax();
 
-		local_irq_disable();
-
 		/* Stop the lapic timer */
 		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
 
-		local_irq_enable();
-
 		/* Jiffies delta */
 		deltaj = lapic_cal_j2 - lapic_cal_j1;
 		apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
-- 
cgit v1.2.3-70-g09d2


From ba21ebb6abac5c46e1d818d2ceda82420bd099ba Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Mon, 10 Nov 2008 09:16:41 +0100
Subject: x86: apic - use pr_ macros for logging

Impact: cleanup

It saves us some source lines and shift the code a bit righter.

And a multiline comment style is fixed too :-)

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Acked-by: "Maciej W. Rozycki" <macro@linux-mips.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c | 116 ++++++++++++++++++++++---------------------------
 1 file changed, 53 insertions(+), 63 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index ce90dc184139..70879c9e3936 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -559,13 +559,13 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta)
 	} else {
 		res = (((u64)deltapm) *  mult) >> 22;
 		do_div(res, 1000000);
-		printk(KERN_WARNING "APIC calibration not consistent "
+		pr_warning("APIC calibration not consistent "
 			"with PM Timer: %ldms instead of 100ms\n",
 			(long)res);
 		/* Correct the lapic counter value */
 		res = (((u64)(*delta)) * pm_100ms);
 		do_div(res, deltapm);
-		printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
+		pr_info("APIC delta adjusted to PM-Timer: "
 			"%lu (%ld)\n", (unsigned long)res, *delta);
 		*delta = (long)res;
 	}
@@ -645,8 +645,7 @@ static int __init calibrate_APIC_clock(void)
 	 */
 	if (calibration_result < (1000000 / HZ)) {
 		local_irq_enable();
-		printk(KERN_WARNING
-		       "APIC frequency too slow, disabling apic timer\n");
+		pr_warning("APIC frequency too slow, disabling apic timer\n");
 		return -1;
 	}
 
@@ -688,8 +687,7 @@ static int __init calibrate_APIC_clock(void)
 		local_irq_enable();
 
 	if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
-		printk(KERN_WARNING
-		       "APIC timer disabled due to verification failure.\n");
+		pr_warning("APIC timer disabled due to verification failure.\n");
 			return -1;
 	}
 
@@ -710,7 +708,7 @@ void __init setup_boot_APIC_clock(void)
 	 * broadcast mechanism is used. On UP systems simply ignore it.
 	 */
 	if (disable_apic_timer) {
-		printk(KERN_INFO "Disabling APIC timer\n");
+		pr_info("Disabling APIC timer\n");
 		/* No broadcast on UP ! */
 		if (num_possible_cpus() > 1) {
 			lapic_clockevent.mult = 1;
@@ -737,7 +735,7 @@ void __init setup_boot_APIC_clock(void)
 	if (nmi_watchdog != NMI_IO_APIC)
 		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
 	else
-		printk(KERN_WARNING "APIC timer registered as dummy,"
+		pr_warning("APIC timer registered as dummy,"
 			" due to nmi_watchdog=%d!\n", nmi_watchdog);
 
 	/* Setup the lapic or request the broadcast */
@@ -769,8 +767,7 @@ static void local_apic_timer_interrupt(void)
 	 * spurious.
 	 */
 	if (!evt->event_handler) {
-		printk(KERN_WARNING
-		       "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
+		pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu);
 		/* Switch it off */
 		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
 		return;
@@ -1089,7 +1086,7 @@ static void __cpuinit lapic_setup_esr(void)
 	unsigned int oldvalue, value, maxlvt;
 
 	if (!lapic_is_integrated()) {
-		printk(KERN_INFO "No ESR for 82489DX.\n");
+		pr_info("No ESR for 82489DX.\n");
 		return;
 	}
 
@@ -1100,7 +1097,7 @@ static void __cpuinit lapic_setup_esr(void)
 		 * ESR disabled - we can't do anything useful with the
 		 * errors anyway - mbligh
 		 */
-		printk(KERN_INFO "Leaving ESR disabled.\n");
+		pr_info("Leaving ESR disabled.\n");
 		return;
 	}
 
@@ -1294,7 +1291,7 @@ void check_x2apic(void)
 	rdmsr(MSR_IA32_APICBASE, msr, msr2);
 
 	if (msr & X2APIC_ENABLE) {
-		printk("x2apic enabled by BIOS, switching to x2apic ops\n");
+		pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
 		x2apic_preenabled = x2apic = 1;
 		apic_ops = &x2apic_ops;
 	}
@@ -1306,7 +1303,7 @@ void enable_x2apic(void)
 
 	rdmsr(MSR_IA32_APICBASE, msr, msr2);
 	if (!(msr & X2APIC_ENABLE)) {
-		printk("Enabling x2apic\n");
+		pr_info("Enabling x2apic\n");
 		wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
 	}
 }
@@ -1321,9 +1318,8 @@ void enable_IR_x2apic(void)
 		return;
 
 	if (!x2apic_preenabled && disable_x2apic) {
-		printk(KERN_INFO
-		       "Skipped enabling x2apic and Interrupt-remapping "
-		       "because of nox2apic\n");
+		pr_info("Skipped enabling x2apic and Interrupt-remapping "
+			"because of nox2apic\n");
 		return;
 	}
 
@@ -1331,22 +1327,19 @@ void enable_IR_x2apic(void)
 		panic("Bios already enabled x2apic, can't enforce nox2apic");
 
 	if (!x2apic_preenabled && skip_ioapic_setup) {
-		printk(KERN_INFO
-		       "Skipped enabling x2apic and Interrupt-remapping "
-		       "because of skipping io-apic setup\n");
+		pr_info("Skipped enabling x2apic and Interrupt-remapping "
+			"because of skipping io-apic setup\n");
 		return;
 	}
 
 	ret = dmar_table_init();
 	if (ret) {
-		printk(KERN_INFO
-		       "dmar_table_init() failed with %d:\n", ret);
+		pr_info("dmar_table_init() failed with %d:\n", ret);
 
 		if (x2apic_preenabled)
 			panic("x2apic enabled by bios. But IR enabling failed");
 		else
-			printk(KERN_INFO
-			       "Not enabling x2apic,Intr-remapping\n");
+			pr_info("Not enabling x2apic,Intr-remapping\n");
 		return;
 	}
 
@@ -1355,7 +1348,7 @@ void enable_IR_x2apic(void)
 
 	ret = save_mask_IO_APIC_setup();
 	if (ret) {
-		printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret);
+		pr_info("Saving IO-APIC state failed: %d\n", ret);
 		goto end;
 	}
 
@@ -1390,14 +1383,11 @@ end:
 
 	if (!ret) {
 		if (!x2apic_preenabled)
-			printk(KERN_INFO
-			       "Enabled x2apic and interrupt-remapping\n");
+			pr_info("Enabled x2apic and interrupt-remapping\n");
 		else
-			printk(KERN_INFO
-			       "Enabled Interrupt-remapping\n");
+			pr_info("Enabled Interrupt-remapping\n");
 	} else
-		printk(KERN_ERR
-		       "Failed to enable Interrupt-remapping and x2apic\n");
+		pr_err("Failed to enable Interrupt-remapping and x2apic\n");
 #else
 	if (!cpu_has_x2apic)
 		return;
@@ -1406,8 +1396,8 @@ end:
 		panic("x2apic enabled prior OS handover,"
 		      " enable CONFIG_INTR_REMAP");
 
-	printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
-	       " and x2apic\n");
+	pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+		" and x2apic\n");
 #endif
 
 	return;
@@ -1424,7 +1414,7 @@ end:
 static int __init detect_init_APIC(void)
 {
 	if (!cpu_has_apic) {
-		printk(KERN_INFO "No local APIC present\n");
+		pr_info("No local APIC present\n");
 		return -1;
 	}
 
@@ -1465,8 +1455,8 @@ static int __init detect_init_APIC(void)
 		 * "lapic" specified.
 		 */
 		if (!force_enable_local_apic) {
-			printk(KERN_INFO "Local APIC disabled by BIOS -- "
-			       "you can enable it with \"lapic\"\n");
+			pr_info("Local APIC disabled by BIOS -- "
+				"you can enable it with \"lapic\"\n");
 			return -1;
 		}
 		/*
@@ -1476,8 +1466,7 @@ static int __init detect_init_APIC(void)
 		 */
 		rdmsr(MSR_IA32_APICBASE, l, h);
 		if (!(l & MSR_IA32_APICBASE_ENABLE)) {
-			printk(KERN_INFO
-			       "Local APIC disabled by BIOS -- reenabling.\n");
+			pr_info("Local APIC disabled by BIOS -- reenabling.\n");
 			l &= ~MSR_IA32_APICBASE_BASE;
 			l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
 			wrmsr(MSR_IA32_APICBASE, l, h);
@@ -1490,7 +1479,7 @@ static int __init detect_init_APIC(void)
 	 */
 	features = cpuid_edx(1);
 	if (!(features & (1 << X86_FEATURE_APIC))) {
-		printk(KERN_WARNING "Could not enable APIC!\n");
+		pr_warning("Could not enable APIC!\n");
 		return -1;
 	}
 	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
@@ -1501,14 +1490,14 @@ static int __init detect_init_APIC(void)
 	if (l & MSR_IA32_APICBASE_ENABLE)
 		mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
 
-	printk(KERN_INFO "Found and enabled local APIC!\n");
+	pr_info("Found and enabled local APIC!\n");
 
 	apic_pm_activate();
 
 	return 0;
 
 no_apic:
-	printk(KERN_INFO "No local APIC present or hardware disabled\n");
+	pr_info("No local APIC present or hardware disabled\n");
 	return -1;
 }
 #endif
@@ -1584,12 +1573,12 @@ int __init APIC_init_uniprocessor(void)
 {
 #ifdef CONFIG_X86_64
 	if (disable_apic) {
-		printk(KERN_INFO "Apic disabled\n");
+		pr_info("Apic disabled\n");
 		return -1;
 	}
 	if (!cpu_has_apic) {
 		disable_apic = 1;
-		printk(KERN_INFO "Apic disabled by BIOS\n");
+		pr_info("Apic disabled by BIOS\n");
 		return -1;
 	}
 #else
@@ -1601,8 +1590,8 @@ int __init APIC_init_uniprocessor(void)
 	 */
 	if (!cpu_has_apic &&
 	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
-		printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n",
-		       boot_cpu_physical_apicid);
+		pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
+			boot_cpu_physical_apicid);
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
 		return -1;
 	}
@@ -1695,8 +1684,8 @@ void smp_spurious_interrupt(struct pt_regs *regs)
 	add_pda(irq_spurious_count, 1);
 #else
 	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
-	printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
-	       "should never happen.\n", smp_processor_id());
+	pr_info("spurious APIC interrupt on CPU#%d, "
+		"should never happen.\n", smp_processor_id());
 	__get_cpu_var(irq_stat).irq_spurious_count++;
 #endif
 	irq_exit();
@@ -1720,17 +1709,18 @@ void smp_error_interrupt(struct pt_regs *regs)
 	ack_APIC_irq();
 	atomic_inc(&irq_err_count);
 
-	/* Here is what the APIC error bits mean:
-	   0: Send CS error
-	   1: Receive CS error
-	   2: Send accept error
-	   3: Receive accept error
-	   4: Reserved
-	   5: Send illegal vector
-	   6: Received illegal vector
-	   7: Illegal register address
-	*/
-	printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
+	/*
+	 * Here is what the APIC error bits mean:
+	 * 0: Send CS error
+	 * 1: Receive CS error
+	 * 2: Send accept error
+	 * 3: Receive accept error
+	 * 4: Reserved
+	 * 5: Send illegal vector
+	 * 6: Received illegal vector
+	 * 7: Illegal register address
+	 */
+	pr_debug("APIC error on CPU%d: %02x(%02x)\n",
 		smp_processor_id(), v , v1);
 	irq_exit();
 }
@@ -1834,15 +1824,15 @@ void __cpuinit generic_processor_info(int apicid, int version)
 	 * Validate version
 	 */
 	if (version == 0x0) {
-		printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
-				"fixing up to 0x10. (tell your hw vendor)\n",
-				version);
+		pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
+			"fixing up to 0x10. (tell your hw vendor)\n",
+			version);
 		version = 0x10;
 	}
 	apic_version[apicid] = version;
 
 	if (num_processors >= NR_CPUS) {
-		printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+		pr_warning("WARNING: NR_CPUS limit of %i reached."
 			"  Processor ignored.\n", NR_CPUS);
 		return;
 	}
@@ -2205,7 +2195,7 @@ static int __init apic_set_verbosity(char *arg)
 	else if (strcmp("verbose", arg) == 0)
 		apic_verbosity = APIC_VERBOSE;
 	else {
-		printk(KERN_WARNING "APIC Verbosity level %s not recognised"
+		pr_warning("APIC Verbosity level %s not recognised"
 			" use apic=verbose or apic=debug\n", arg);
 		return -EINVAL;
 	}
-- 
cgit v1.2.3-70-g09d2


From a3d732f93785da17e0137210deadb4616f5536fc Mon Sep 17 00:00:00 2001
From: Cliff Wickman <cpw@sgi.com>
Date: Mon, 10 Nov 2008 16:16:31 -0600
Subject: x86, UV: fix redundant creation of sgi_uv

Impact: fix double entry creation in /proc

There is a collision between two UV functions:
  both uv_ptc_init() and gru_proc_init() try to make /proc/sgi_uv

So move it's creation to a single place: uv_system_init()

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/genx2apic_uv_x.c | 2 ++
 arch/x86/kernel/tlb_uv.c         | 4 ----
 drivers/misc/sgi-gru/gruprocfs.c | 1 -
 3 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 85fb7dd48f67..d7213a1cb784 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/hardirq.h>
 #include <linux/timer.h>
+#include <linux/proc_fs.h>
 #include <asm/current.h>
 #include <asm/smp.h>
 #include <asm/ipi.h>
@@ -570,4 +571,5 @@ void __init uv_system_init(void)
 
 	uv_cpu_init();
 	uv_scir_register_cpu_notifier();
+	proc_mkdir("sgi_uv", NULL);
 }
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 04431f34fd16..6a00e5faaa74 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -566,14 +566,10 @@ static int __init uv_ptc_init(void)
 	if (!is_uv_system())
 		return 0;
 
-	if (!proc_mkdir("sgi_uv", NULL))
-		return -EINVAL;
-
 	proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL);
 	if (!proc_uv_ptc) {
 		printk(KERN_ERR "unable to create %s proc entry\n",
 		       UV_PTC_BASENAME);
-		remove_proc_entry("sgi_uv", NULL);
 		return -EINVAL;
 	}
 	proc_uv_ptc->proc_fops = &proc_uv_ptc_operations;
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
index 533923f83f1a..73b0ca061bb5 100644
--- a/drivers/misc/sgi-gru/gruprocfs.c
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -317,7 +317,6 @@ int gru_proc_init(void)
 {
 	struct proc_entry *p;
 
-	proc_mkdir("sgi_uv", NULL);
 	proc_gru = proc_mkdir("sgi_uv/gru", NULL);
 
 	for (p = proc_files; p->name; p++)
-- 
cgit v1.2.3-70-g09d2


From d3ec5cae0921611ceae06464ef6291012dd9849f Mon Sep 17 00:00:00 2001
From: Ivan Vecera <ivecera@redhat.com>
Date: Tue, 11 Nov 2008 14:33:44 +0100
Subject: x86: call machine_shutdown and stop all CPUs in native_machine_halt

Impact: really halt all CPUs on halt

Function machine_halt (resp. native_machine_halt) is empty for x86
architectures. When command 'halt -f' is invoked, the message "System
halted." is displayed but this is not really true because all CPUs are
still running.

There are also similar inconsistencies for other arches (some uses
power-off for halt or forever-loop with IRQs enabled/disabled).

IMO there should be used the same approach for all architectures OR
what does the message "System halted" really mean?

This patch fixes it for x86.

Signed-off-by: Ivan Vecera <ivecera@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/apic.h   |  1 +
 arch/x86/include/asm/system.h |  2 ++
 arch/x86/kernel/process.c     | 16 ++++++++++++++++
 arch/x86/kernel/reboot.c      |  5 +++++
 arch/x86/kernel/smp.c         | 13 -------------
 5 files changed, 24 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3b1510b4fc57..25caa0738af5 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -193,6 +193,7 @@ extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask);
 static inline void lapic_shutdown(void) { }
 #define local_apic_timer_c2_ok		1
 static inline void init_apic_mappings(void) { }
+static inline void disable_local_APIC(void) { }
 
 #endif /* !CONFIG_X86_LOCAL_APIC */
 
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 2ed3f0f44ff7..07c3e4048991 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -314,6 +314,8 @@ extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
 
 void default_idle(void);
 
+void stop_this_cpu(void *dummy);
+
 /*
  * Force strict CPU ordering.
  * And yes, this is required on UP too when we're talking
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c622772744d8..a4da7c4b3129 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -8,6 +8,7 @@
 #include <linux/pm.h>
 #include <linux/clockchips.h>
 #include <asm/system.h>
+#include <asm/apic.h>
 
 unsigned long idle_halt;
 EXPORT_SYMBOL(idle_halt);
@@ -122,6 +123,21 @@ void default_idle(void)
 EXPORT_SYMBOL(default_idle);
 #endif
 
+void stop_this_cpu(void *dummy)
+{
+	local_irq_disable();
+	/*
+	 * Remove this CPU:
+	 */
+	cpu_clear(smp_processor_id(), cpu_online_map);
+	disable_local_APIC();
+
+	for (;;) {
+		if (hlt_works(smp_processor_id()))
+			halt();
+	}
+}
+
 static void do_nothing(void *unused)
 {
 }
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 724adfc63cb9..34f8d37ae3c5 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -461,6 +461,11 @@ static void native_machine_restart(char *__unused)
 
 static void native_machine_halt(void)
 {
+	/* stop other cpus and apics */
+	machine_shutdown();
+
+	/* stop this cpu */
+	stop_this_cpu(NULL);
 }
 
 static void native_machine_power_off(void)
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 18f9b19f5f8f..3f92b134ab90 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -140,19 +140,6 @@ void native_send_call_func_ipi(cpumask_t mask)
 		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
 }
 
-static void stop_this_cpu(void *dummy)
-{
-	local_irq_disable();
-	/*
-	 * Remove this CPU:
-	 */
-	cpu_clear(smp_processor_id(), cpu_online_map);
-	disable_local_APIC();
-	if (hlt_works(smp_processor_id()))
-		for (;;) halt();
-	for (;;);
-}
-
 /*
  * this function calls the 'stop' function on all other CPUs in the system.
  */
-- 
cgit v1.2.3-70-g09d2


From a98f8fd24fb24fcb9a359553e64dd6aac5cf4279 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 6 Nov 2008 01:13:39 +0100
Subject: x86: apic reset counter on shutdown

Impact: avoid spurious lapic timer events on shutdown

The apic timer might be close to firing when it is shutdown. We can
not really disable the timer - we just mask the interrupt. That way we
can get an extra interrupt when it is reenabled. Set the counter to
max on shutdown to avoid this.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 70879c9e3936..1d410ee4b064 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -441,6 +441,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
 		v = apic_read(APIC_LVTT);
 		v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
 		apic_write(APIC_LVTT, v);
+		apic_write(APIC_TMICT, 0xffffffff);
 		break;
 	case CLOCK_EVT_MODE_RESUME:
 		/* Nothing to do here */
-- 
cgit v1.2.3-70-g09d2


From c280ea5e4c6ba0b38ed6b005150fe16a660e903b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 8 Nov 2008 13:29:45 +0100
Subject: x86: fix documentation typo in arch/x86/Kconfig

Impact: documentation update

Chris Snook pointed out that it's Core i7, not Core 7i.

Reported-by: Chris Snook <csnook@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 38ae04bf6514..bacac556b189 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -963,7 +963,7 @@ config NUMA
 	  local memory controller of the CPU and add some more
 	  NUMA awareness to the kernel.
 
-	  For 64-bit this is recommended if the system is Intel Core 7i
+	  For 64-bit this is recommended if the system is Intel Core i7
 	  (or later), AMD Opteron, or EM64T NUMA.
 
 	  For 32-bit this is only needed on (rare) 32-bit-only platforms
-- 
cgit v1.2.3-70-g09d2


From 4687518c4cb7807fbeff21770e309080f9eb7f2f Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Tue, 11 Nov 2008 13:03:07 -0800
Subject: x86: 32 bit: interrupt stub consistency with 64 bit

Don't generate interrupt stubs for interrupt vectors below
FIRST_EXTERNAL_VECTOR, and make the table of interrupt vectors
(interrupt[]) __initconst.  Both of these changes both conserve memory
and improve consistency with 64 bits.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/hw_irq.h | 2 +-
 arch/x86/kernel/entry_32.S    | 6 +++---
 arch/x86/kernel/irqinit_32.c  | 2 +-
 arch/x86/lguest/boot.c        | 3 ++-
 4 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b97aecb0b61d..27d33f92afe2 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -110,7 +110,7 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
 #endif
 
 #ifdef CONFIG_X86_32
-extern void (*const interrupt[NR_VECTORS])(void);
+extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
 #endif
 
 typedef int vector_irq_t[NR_VECTORS];
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 28b597ef9ca1..4aea95652cff 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -622,16 +622,16 @@ END(syscall_badsys)
  * Build the entry stubs and pointer table with
  * some assembler magic.
  */
-.section .rodata,"a"
+.section .init.rodata,"a"
 ENTRY(interrupt)
 .text
 
 ENTRY(irq_entries_start)
 	RING0_INT_FRAME
-vector=0
+vector=FIRST_EXTERNAL_VECTOR
 .rept NR_VECTORS
 	ALIGN
- .if vector
+ .if vector != FIRST_EXTERNAL_VECTOR
 	CFI_ADJUST_CFA_OFFSET -4
  .endif
 1:	pushl $~(vector)
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 845aa9803e80..607db63044a5 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -129,7 +129,7 @@ void __init native_init_IRQ(void)
 	for (i =  FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
 		/* SYSCALL_VECTOR was reserved in trap_init. */
 		if (i != SYSCALL_VECTOR)
-			set_intr_gate(i, interrupt[i]);
+			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
 	}
 
 
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index a5d8e1ace1cf..50a779264bb1 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -590,7 +590,8 @@ static void __init lguest_init_IRQ(void)
 		 * a straightforward 1 to 1 mapping, so force that here. */
 		__get_cpu_var(vector_irq)[vector] = i;
 		if (vector != SYSCALL_VECTOR) {
-			set_intr_gate(vector, interrupt[vector]);
+			set_intr_gate(vector,
+				      interrupt[vector-FIRST_EXTERNAL_VECTOR]);
 			set_irq_chip_and_handler_name(i, &lguest_irq_controller,
 						      handle_level_irq,
 						      "level");
-- 
cgit v1.2.3-70-g09d2


From b7c6244f13d37592003b46e12500a90e9781ad9d Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Tue, 11 Nov 2008 13:24:58 -0800
Subject: x86: 32 bits: shrink and align IRQ stubs

Shrink the IRQ stubs on 32 bits down to just over four bytes per (we
fit seven into a 32-byte chunk.)  This shrinks the total icache
consumption of the IRQ stubs down to an even kilobyte, if all of them
are in active use.

The downside is that we end up with a double jump, which could have a
negative effect on some pipelines.  The double jump is always inside
the same cacheline on any modern chips (the exception being
486/Elan/Geode which have only 16-byte cachelines, but are unlikely to
have too many interrupt sources.)

To get the most effect, cache-align the IRQ stubs.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/entry_32.S | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 4aea95652cff..dae81b9fd451 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -619,28 +619,37 @@ END(syscall_badsys)
 27:;
 
 /*
- * Build the entry stubs and pointer table with
- * some assembler magic.
+ * Build the entry stubs and pointer table with some assembler magic.
+ * We pack 7 stubs into a single 32-byte chunk, which will fit in a
+ * single cache line on all modern x86 implementations.
  */
 .section .init.rodata,"a"
 ENTRY(interrupt)
 .text
-
+	.p2align 5
+	.p2align CONFIG_X86_L1_CACHE_SHIFT
 ENTRY(irq_entries_start)
 	RING0_INT_FRAME
 vector=FIRST_EXTERNAL_VECTOR
-.rept NR_VECTORS
-	ALIGN
- .if vector != FIRST_EXTERNAL_VECTOR
+.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
+	.balign 32
+  .rept	7
+    .if vector < NR_VECTORS
+      .if vector != FIRST_EXTERNAL_VECTOR
 	CFI_ADJUST_CFA_OFFSET -4
- .endif
-1:	pushl $~(vector)
+      .endif
+1:	pushl $(~vector+0x80)	/* Note: always in signed byte range */
 	CFI_ADJUST_CFA_OFFSET 4
-	jmp common_interrupt
- .previous
+      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) != 6
+	jmp 2f
+      .endif
+      .previous
 	.long 1b
- .text
+      .text
 vector=vector+1
+    .endif
+  .endr
+2:	jmp common_interrupt
 .endr
 END(irq_entries_start)
 
@@ -652,8 +661,9 @@ END(interrupt)
  * the CPU automatically disables interrupts when executing an IRQ vector,
  * so IRQ-flags tracing has to follow that:
  */
-	ALIGN
+	.p2align CONFIG_X86_L1_CACHE_SHIFT
 common_interrupt:
+	addl $-0x80,(%esp)	/* Adjust vector into the [-256,-1] range */
 	SAVE_ALL
 	TRACE_IRQS_OFF
 	movl %esp,%eax
-- 
cgit v1.2.3-70-g09d2


From 939b787130bf22887a09d8fd2641a094dcef8c22 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Tue, 11 Nov 2008 13:51:52 -0800
Subject: x86: 64 bits: shrink and align IRQ stubs

Move the IRQ stub generation to assembly to simplify it and for
consistency with 32 bits.  Doing it in a C file with asm() statements
doesn't help clarity, and it prevents some optimizations.

Shrink the IRQ stubs down to just over four bytes per (we fit seven
into a 32-byte chunk.)  This shrinks the total icache consumption of
the IRQ stubs down to an even kilobyte, if all of them are in active
use.

The downside is that we end up with a double jump, which could have a
negative effect on some pipelines.  The double jump is always inside
the same cacheline on any modern chips.

To get the most effect, cache-align the IRQ stubs.

This makes the 64-bit code match changes already done to the 32-bit
code, and should open up irqinit*.c for unification.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/hw_irq.h |  2 --
 arch/x86/kernel/entry_64.S    | 48 +++++++++++++++++++++++++++++--
 arch/x86/kernel/irqinit_64.c  | 66 -------------------------------------------
 3 files changed, 45 insertions(+), 71 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 27d33f92afe2..8de644b6b959 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -109,9 +109,7 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
 #endif
 #endif
 
-#ifdef CONFIG_X86_32
 extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
-#endif
 
 typedef int vector_irq_t[NR_VECTORS];
 DECLARE_PER_CPU(vector_irq_t, vector_irq);
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b86f332c96a6..9b2aeaac9a6b 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -627,6 +627,46 @@ END(stub_rt_sigreturn)
    vector already pushed) */
 #define XCPT_FRAME _frame ORIG_RAX
 
+/*
+ * Build the entry stubs and pointer table with some assembler magic.
+ * We pack 7 stubs into a single 32-byte chunk, which will fit in a
+ * single cache line on all modern x86 implementations.
+ */
+	.section .init.rodata,"a"
+ENTRY(interrupt)
+	.text
+	.p2align 5
+	.p2align CONFIG_X86_L1_CACHE_SHIFT
+ENTRY(irq_entries_start)
+	INTR_FRAME
+vector=FIRST_EXTERNAL_VECTOR
+.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
+	.balign 32
+  .rept	7
+    .if vector < NR_VECTORS
+      .if vector != FIRST_EXTERNAL_VECTOR
+	CFI_ADJUST_CFA_OFFSET -8
+      .endif
+1:	pushq $(~vector+0x80)	/* Note: always in signed byte range */
+	CFI_ADJUST_CFA_OFFSET 8
+      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) != 6
+	jmp 2f
+      .endif
+      .previous
+	.quad 1b
+      .text
+vector=vector+1
+    .endif
+  .endr
+2:	jmp common_interrupt
+.endr
+	CFI_ENDPROC
+END(irq_entries_start)
+
+.previous
+END(interrupt)
+.previous
+
 /* 
  * Interrupt entry/exit.
  *
@@ -635,11 +675,12 @@ END(stub_rt_sigreturn)
  * Entry runs with interrupts off.	
  */ 
 
-/* 0(%rsp): interrupt number */ 
+/* 0(%rsp): ~(interrupt number)+0x80 */ 
 	.macro interrupt func
+	addq $-0x80,(%rsp)		/* Adjust vector to [-256,-1] range */
 	cld
 	SAVE_ARGS
-	leaq -ARGOFFSET(%rsp),%rdi	# arg1 for handler
+	leaq -ARGOFFSET(%rsp),%rdi	/* arg1 for handler */
 	pushq %rbp
 	/*
 	 * Save rbp twice: One is for marking the stack frame, as usual, and the
@@ -670,7 +711,8 @@ END(stub_rt_sigreturn)
 	call \func
 	.endm
 
-ENTRY(common_interrupt)
+	.p2align CONFIG_X86_L1_CACHE_SHIFT
+common_interrupt:
 	XCPT_FRAME
 	interrupt do_IRQ
 	/* 0(%rsp): oldrsp-ARGOFFSET */
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index ff0235391285..8670b3ce626e 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -23,41 +23,6 @@
 #include <asm/apic.h>
 #include <asm/i8259.h>
 
-/*
- * Common place to define all x86 IRQ vectors
- *
- * This builds up the IRQ handler stubs using some ugly macros in irq.h
- *
- * These macros create the low-level assembly IRQ routines that save
- * register context and call do_IRQ(). do_IRQ() then does all the
- * operations that are needed to keep the AT (or SMP IOAPIC)
- * interrupt-controller happy.
- */
-
-#define IRQ_NAME2(nr) nr##_interrupt(void)
-#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
-
-/*
- *	SMP has a few special interrupts for IPI messages
- */
-
-#define BUILD_IRQ(nr)				\
-	asmlinkage void IRQ_NAME(nr);		\
-	asm("\n.text\n.p2align\n"		\
-	    "IRQ" #nr "_interrupt:\n\t"		\
-	    "push $~(" #nr ") ; "		\
-	    "jmp common_interrupt\n"		\
-	    ".previous");
-
-#define BI(x,y) \
-	BUILD_IRQ(x##y)
-
-#define BUILD_16_IRQS(x) \
-	BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
-	BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
-	BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
-	BI(x,c) BI(x,d) BI(x,e) BI(x,f)
-
 /*
  * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
  * (these are usually mapped to vectors 0x30-0x3f)
@@ -73,37 +38,6 @@
  *
  * (these are usually mapped into the 0x30-0xff vector range)
  */
-				      BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
-BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
-BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
-BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
-
-#undef BUILD_16_IRQS
-#undef BI
-
-
-#define IRQ(x,y) \
-	IRQ##x##y##_interrupt
-
-#define IRQLIST_16(x) \
-	IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
-	IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
-	IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
-	IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
-
-/* for the irq vectors */
-static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
-					  IRQLIST_16(0x2), IRQLIST_16(0x3),
-	IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
-	IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
-	IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf)
-};
-
-#undef IRQ
-#undef IRQLIST_16
-
-
-
 
 /*
  * IRQ2 is cascade interrupt to second interrupt controller
-- 
cgit v1.2.3-70-g09d2


From 14d7ca5c575853664d8fe4f225a77b8df1b7de7d Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Tue, 11 Nov 2008 16:19:48 -0800
Subject: x86: attempt reboot via port CF9 if we have standard PCI ports

Impact: Changes reboot behavior.

If port CF9 seems to be safe to touch, attempt it before trying the
keyboard controller.  Port CF9 is not available on all chipsets (a
significant but decreasing number of modern chipsets don't implement
it), but port CF9 itself should in general be safe to poke (no ill
effects if unimplemented) on any system which has PCI Configuration
Method #1 or #2, as it falls inside the PCI configuration port range
in both cases.  No chipset without PCI is known to have port CF9,
either, although an explicit "pci=bios" would mean we miss this and
therefore don't use port CF9.  An explicit "reboot=pci" can be used to
force the use of port CF9.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/emergency-restart.h |  4 +++-
 arch/x86/kernel/reboot.c                 | 34 +++++++++++++++++++++++++-------
 arch/x86/pci/direct.c                    |  4 +++-
 arch/x86/pci/pci.h                       |  1 +
 4 files changed, 34 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h
index 94826cf87455..cc70c1c78ca4 100644
--- a/arch/x86/include/asm/emergency-restart.h
+++ b/arch/x86/include/asm/emergency-restart.h
@@ -8,7 +8,9 @@ enum reboot_type {
 	BOOT_BIOS = 'b',
 #endif
 	BOOT_ACPI = 'a',
-	BOOT_EFI = 'e'
+	BOOT_EFI = 'e',
+	BOOT_CF9 = 'p',
+	BOOT_CF9_COND = 'q',
 };
 
 extern enum reboot_type reboot_type;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 34f8d37ae3c5..ddc93891cdcd 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -29,14 +29,17 @@ EXPORT_SYMBOL(pm_power_off);
 
 static const struct desc_ptr no_idt = {};
 static int reboot_mode;
-enum reboot_type reboot_type = BOOT_KBD;
+enum reboot_type reboot_type = BOOT_CF9_COND;
 int reboot_force;
 
 #if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
 static int reboot_cpu = -1;
 #endif
 
-/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old]
+/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
+bool port_cf9_safe = false;
+
+/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]
    warm   Don't set the cold reboot flag
    cold   Set the cold reboot flag
    bios   Reboot by jumping through the BIOS (only for X86_32)
@@ -45,6 +48,7 @@ static int reboot_cpu = -1;
    kbd    Use the keyboard controller. cold reset (default)
    acpi   Use the RESET_REG in the FADT
    efi    Use efi reset_system runtime service
+   pci    Use the so-called "PCI reset register", CF9
    force  Avoid anything that could hang.
  */
 static int __init reboot_setup(char *str)
@@ -79,6 +83,7 @@ static int __init reboot_setup(char *str)
 		case 'k':
 		case 't':
 		case 'e':
+		case 'p':
 			reboot_type = *str;
 			break;
 
@@ -379,28 +384,43 @@ static void native_machine_emergency_restart(void)
 			load_idt(&no_idt);
 			__asm__ __volatile__("int3");
 
-			reboot_type = BOOT_KBD;
+			reboot_type = BOOT_CF9_COND;
 			break;
 
 #ifdef CONFIG_X86_32
 		case BOOT_BIOS:
 			machine_real_restart(jump_to_bios, sizeof(jump_to_bios));
 
-			reboot_type = BOOT_KBD;
+			reboot_type = BOOT_CF9_COND;
 			break;
 #endif
 
 		case BOOT_ACPI:
 			acpi_reboot();
-			reboot_type = BOOT_KBD;
+			reboot_type = BOOT_CF9_COND;
 			break;
 
-
 		case BOOT_EFI:
 			if (efi_enabled)
-				efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD,
+				efi.reset_system(reboot_mode ?
+						 EFI_RESET_WARM :
+						 EFI_RESET_COLD,
 						 EFI_SUCCESS, 0, NULL);
+			reboot_type = BOOT_CF9_COND;
+			break;
+
+		case BOOT_CF9:
+			port_cf9_safe = true;
+			/* fall through */
 
+		case BOOT_CF9_COND:
+			if (port_cf9_safe) {
+				u8 cf9 = inb(0xcf9) & ~6;
+				outb(cf9|2, 0xcf9); /* Request hard reset */
+				udelay(50);
+				outb(cf9|6, 0xcf9); /* Actually do the reset */
+				udelay(50);
+			}
 			reboot_type = BOOT_KBD;
 			break;
 		}
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 9915293500fb..9a5af6c8fbe9 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -173,7 +173,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus,
 
 #undef PCI_CONF2_ADDRESS
 
-static struct pci_raw_ops pci_direct_conf2 = {
+struct pci_raw_ops pci_direct_conf2 = {
 	.read =		pci_conf2_read,
 	.write =	pci_conf2_write,
 };
@@ -289,6 +289,7 @@ int __init pci_direct_probe(void)
 
 	if (pci_check_type1()) {
 		raw_pci_ops = &pci_direct_conf1;
+		port_cf9_safe = true;
 		return 1;
 	}
 	release_resource(region);
@@ -305,6 +306,7 @@ int __init pci_direct_probe(void)
 
 	if (pci_check_type2()) {
 		raw_pci_ops = &pci_direct_conf2;
+		port_cf9_safe = true;
 		return 2;
 	}
 
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
index 15b9cf6be729..1959018aac02 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/pci/pci.h
@@ -96,6 +96,7 @@ extern struct pci_raw_ops *raw_pci_ops;
 extern struct pci_raw_ops *raw_pci_ext_ops;
 
 extern struct pci_raw_ops pci_direct_conf1;
+extern bool port_cf9_safe;
 
 /* arch_initcall level */
 extern int pci_direct_probe(void);
-- 
cgit v1.2.3-70-g09d2


From 4a61204856e8b28e9f5489a7875cb3a60afd1e43 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Tue, 11 Nov 2008 19:09:29 -0800
Subject: x86: signal_32: introduce retcode and rt_retcode

Impact: cleanup

Introduce retcode and rt_retcode to replace setting up frame->retcode.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 27a5c8174322..514171ac0d03 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -45,6 +45,28 @@
 # define FIX_EFLAGS	__FIX_EFLAGS
 #endif
 
+static const struct {
+	u16 poplmovl;
+	u32 val;
+	u16 int80;
+} __attribute__((packed)) retcode = {
+	0xb858,		/* popl %eax; movl $..., %eax */
+	__NR_sigreturn,
+	0x80cd,		/* int $0x80 */
+};
+
+static const struct {
+	u8  movl;
+	u32 val;
+	u16 int80;
+	u8  pad;
+} __attribute__((packed)) rt_retcode = {
+	0xb8,		/* movl $..., %eax */
+	__NR_rt_sigreturn,
+	0x80cd,		/* int $0x80 */
+	0
+};
+
 /*
  * Atomically swap in the new signal mask, and wait for a signal.
  */
@@ -427,9 +449,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
 	 * reasons and because gdb uses it as a signature to notice
 	 * signal handler stack frames.
 	 */
-	err |= __put_user(0xb858, (short __user *)(frame->retcode+0));
-	err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2));
-	err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
+	err |= __put_user(*((u64 *)&retcode), (u64 *)frame->retcode);
 
 	if (err)
 		return -EFAULT;
@@ -498,9 +518,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	 * reasons and because gdb uses it as a signature to notice
 	 * signal handler stack frames.
 	 */
-	err |= __put_user(0xb8, (char __user *)(frame->retcode+0));
-	err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1));
-	err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
+	err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
 
 	if (err)
 		return -EFAULT;
-- 
cgit v1.2.3-70-g09d2


From 9cc3c49ed1b1dbf6e50de4055a6773bd162f24b7 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Tue, 11 Nov 2008 19:11:39 -0800
Subject: x86: ia32_signal: remove unnecessary padding

Impact: reduce structure padding

Remove unnecessary paddings, this saves 4 bytes.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 47ddc23f4f54..e2d0bc779bf7 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -427,12 +427,10 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 		u16 poplmovl;
 		u32 val;
 		u16 int80;
-		u16 pad;
 	} __attribute__((packed)) code = {
 		0xb858,		 /* popl %eax ; movl $...,%eax */
 		__NR_ia32_sigreturn,
 		0x80cd,		/* int $0x80 */
-		0,
 	};
 
 	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
@@ -508,8 +506,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		u8 movl;
 		u32 val;
 		u16 int80;
-		u16 pad;
-		u8  pad2;
+		u8  pad;
 	} __attribute__((packed)) code = {
 		0xb8,
 		__NR_ia32_rt_sigreturn,
-- 
cgit v1.2.3-70-g09d2


From a7d41820f683c35b53af719210a51f6aa0f86a6a Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Wed, 12 Nov 2008 11:34:37 -0200
Subject: x86 kdump: extract kdump-specific code from crash_nmi_callback()

Impact: cleanup

The NMI CPU-halting code will be used on non-kdump cases, also
(e.g. emergency_reboot when virtualization is enabled).

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/crash.c | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 268553817909..60475422a51a 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -35,19 +35,34 @@ static int crashing_cpu;
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
 static atomic_t waiting_for_crash_ipi;
 
-static int crash_nmi_callback(struct notifier_block *self,
-			unsigned long val, void *data)
+static void kdump_nmi_callback(int cpu, struct die_args *args)
 {
 	struct pt_regs *regs;
 #ifdef CONFIG_X86_32
 	struct pt_regs fixed_regs;
 #endif
+
+	regs = args->regs;
+
+#ifdef CONFIG_X86_32
+	if (!user_mode_vm(regs)) {
+		crash_fixup_ss_esp(&fixed_regs, regs);
+		regs = &fixed_regs;
+	}
+#endif
+	crash_save_cpu(regs, cpu);
+
+	disable_local_APIC();
+}
+
+static int crash_nmi_callback(struct notifier_block *self,
+			unsigned long val, void *data)
+{
 	int cpu;
 
 	if (val != DIE_NMI_IPI)
 		return NOTIFY_OK;
 
-	regs = ((struct die_args *)data)->regs;
 	cpu = raw_smp_processor_id();
 
 	/* Don't do anything if this handler is invoked on crashing cpu.
@@ -58,14 +73,8 @@ static int crash_nmi_callback(struct notifier_block *self,
 		return NOTIFY_STOP;
 	local_irq_disable();
 
-#ifdef CONFIG_X86_32
-	if (!user_mode_vm(regs)) {
-		crash_fixup_ss_esp(&fixed_regs, regs);
-		regs = &fixed_regs;
-	}
-#endif
-	crash_save_cpu(regs, cpu);
-	disable_local_APIC();
+	kdump_nmi_callback(cpu, (struct die_args *)data);
+
 	atomic_dec(&waiting_for_crash_ipi);
 	/* Assume hlt works */
 	halt();
-- 
cgit v1.2.3-70-g09d2


From b2bbe71b829564fb65a6bc7e1e25e02d70cffce8 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Wed, 12 Nov 2008 11:34:38 -0200
Subject: x86 kdump: move crashing_cpu assignment to nmi_shootdown_cpus()

Impact: cleanup

This variable will be moved to non-kdump-specific code.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/crash.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 60475422a51a..ed2f0f9dc894 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -29,10 +29,11 @@
 
 #include <mach_ipi.h>
 
+#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
+
 /* This keeps a track of which one is crashing cpu. */
 static int crashing_cpu;
 
-#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
 static atomic_t waiting_for_crash_ipi;
 
 static void kdump_nmi_callback(int cpu, struct die_args *args)
@@ -100,6 +101,9 @@ static void nmi_shootdown_cpus(void)
 {
 	unsigned long msecs;
 
+	/* Make a note of crashing cpu. Will be used in NMI callback.*/
+	crashing_cpu = safe_smp_processor_id();
+
 	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
 	/* Would it be better to replace the trap vector here? */
 	if (register_die_notifier(&crash_nmi_nb))
@@ -140,8 +144,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 	/* The kernel is broken so disable interrupts */
 	local_irq_disable();
 
-	/* Make a note of crashing cpu. Will be used in NMI callback.*/
-	crashing_cpu = safe_smp_processor_id();
 	nmi_shootdown_cpus();
 	lapic_shutdown();
 #if defined(CONFIG_X86_IO_APIC)
-- 
cgit v1.2.3-70-g09d2


From d1e7b91cfaa8fc5ed736dcfb8beb5134a2385228 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Wed, 12 Nov 2008 11:34:39 -0200
Subject: x86 kdump: create kdump_nmi_shootdown_cpus()

Impact: cleanup

For the kdump-specific code that was living on nmi_shootdown_cpus().

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/crash.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index ed2f0f9dc894..75c468cc7e59 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -122,10 +122,17 @@ static void nmi_shootdown_cpus(void)
 	}
 
 	/* Leave the nmi callback set */
+}
+
+static void kdump_nmi_shootdown_cpus(void)
+{
+	nmi_shootdown_cpus();
+
 	disable_local_APIC();
 }
+
 #else
-static void nmi_shootdown_cpus(void)
+static void kdump_nmi_shootdown_cpus(void)
 {
 	/* There are no cpus to shootdown */
 }
@@ -144,7 +151,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 	/* The kernel is broken so disable interrupts */
 	local_irq_disable();
 
-	nmi_shootdown_cpus();
+	kdump_nmi_shootdown_cpus();
 	lapic_shutdown();
 #if defined(CONFIG_X86_IO_APIC)
 	disable_IO_APIC();
-- 
cgit v1.2.3-70-g09d2


From 8e294786316aca41c66b8b73ba1ee74a4ae7d452 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Wed, 12 Nov 2008 11:34:40 -0200
Subject: x86 kdump: make kdump_nmi_callback() a function ptr on
 crash_nmi_callback()

Impact: extend nmi_shootdown_cpus() with a callback

The reboot code will use a different function on crash_nmi_callback().
Adding a function pointer parameter to nmi_shootdown_cpus() for that.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/crash.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 75c468cc7e59..f23c2beeb37d 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -29,10 +29,13 @@
 
 #include <mach_ipi.h>
 
+typedef void (*nmi_shootdown_cb)(int, struct die_args*);
+
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
 
 /* This keeps a track of which one is crashing cpu. */
 static int crashing_cpu;
+static nmi_shootdown_cb shootdown_callback;
 
 static atomic_t waiting_for_crash_ipi;
 
@@ -74,7 +77,7 @@ static int crash_nmi_callback(struct notifier_block *self,
 		return NOTIFY_STOP;
 	local_irq_disable();
 
-	kdump_nmi_callback(cpu, (struct die_args *)data);
+	shootdown_callback(cpu, (struct die_args *)data);
 
 	atomic_dec(&waiting_for_crash_ipi);
 	/* Assume hlt works */
@@ -97,13 +100,15 @@ static struct notifier_block crash_nmi_nb = {
 	.notifier_call = crash_nmi_callback,
 };
 
-static void nmi_shootdown_cpus(void)
+static void nmi_shootdown_cpus(nmi_shootdown_cb callback)
 {
 	unsigned long msecs;
 
 	/* Make a note of crashing cpu. Will be used in NMI callback.*/
 	crashing_cpu = safe_smp_processor_id();
 
+	shootdown_callback = callback;
+
 	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
 	/* Would it be better to replace the trap vector here? */
 	if (register_die_notifier(&crash_nmi_nb))
@@ -126,7 +131,7 @@ static void nmi_shootdown_cpus(void)
 
 static void kdump_nmi_shootdown_cpus(void)
 {
-	nmi_shootdown_cpus();
+	nmi_shootdown_cpus(kdump_nmi_callback);
 
 	disable_local_APIC();
 }
-- 
cgit v1.2.3-70-g09d2


From c370e5e089adfd5b1b863f3464cccae9ebf33cca Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Wed, 12 Nov 2008 11:34:41 -0200
Subject: x86 kdump: make nmi_shootdown_cpus() non-static

Impact: make API available to the rest of x86 platform code

Add prototype to asm/reboot.h.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/reboot.h | 5 +++++
 arch/x86/kernel/crash.c       | 3 +--
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index df7710354f85..562d4fd31ba8 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_X86_REBOOT_H
 #define _ASM_X86_REBOOT_H
 
+#include <linux/kdebug.h>
+
 struct pt_regs;
 
 struct machine_ops {
@@ -18,4 +20,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs);
 void native_machine_shutdown(void);
 void machine_real_restart(const unsigned char *code, int length);
 
+typedef void (*nmi_shootdown_cb)(int, struct die_args*);
+void nmi_shootdown_cpus(nmi_shootdown_cb callback);
+
 #endif /* _ASM_X86_REBOOT_H */
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index f23c2beeb37d..fb298d1daac9 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -29,7 +29,6 @@
 
 #include <mach_ipi.h>
 
-typedef void (*nmi_shootdown_cb)(int, struct die_args*);
 
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
 
@@ -100,7 +99,7 @@ static struct notifier_block crash_nmi_nb = {
 	.notifier_call = crash_nmi_callback,
 };
 
-static void nmi_shootdown_cpus(nmi_shootdown_cb callback)
+void nmi_shootdown_cpus(nmi_shootdown_cb callback)
 {
 	unsigned long msecs;
 
-- 
cgit v1.2.3-70-g09d2


From 2ddded213895e41b9cfe1c084127e6c01632ac1a Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Wed, 12 Nov 2008 11:34:42 -0200
Subject: x86: move nmi_shootdown_cpus() to reboot.c

Impact: make nmi_shootdown_cpus() available to the rest of the x86 platform

Now nmi_shootdown_cpus() is ready to be used by non-kdump code also.
Move it to reboot.c.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/crash.c  | 76 --------------------------------------------
 arch/x86/kernel/reboot.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+), 76 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index fb298d1daac9..d84a852e4cd7 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -32,12 +32,6 @@
 
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
 
-/* This keeps a track of which one is crashing cpu. */
-static int crashing_cpu;
-static nmi_shootdown_cb shootdown_callback;
-
-static atomic_t waiting_for_crash_ipi;
-
 static void kdump_nmi_callback(int cpu, struct die_args *args)
 {
 	struct pt_regs *regs;
@@ -58,76 +52,6 @@ static void kdump_nmi_callback(int cpu, struct die_args *args)
 	disable_local_APIC();
 }
 
-static int crash_nmi_callback(struct notifier_block *self,
-			unsigned long val, void *data)
-{
-	int cpu;
-
-	if (val != DIE_NMI_IPI)
-		return NOTIFY_OK;
-
-	cpu = raw_smp_processor_id();
-
-	/* Don't do anything if this handler is invoked on crashing cpu.
-	 * Otherwise, system will completely hang. Crashing cpu can get
-	 * an NMI if system was initially booted with nmi_watchdog parameter.
-	 */
-	if (cpu == crashing_cpu)
-		return NOTIFY_STOP;
-	local_irq_disable();
-
-	shootdown_callback(cpu, (struct die_args *)data);
-
-	atomic_dec(&waiting_for_crash_ipi);
-	/* Assume hlt works */
-	halt();
-	for (;;)
-		cpu_relax();
-
-	return 1;
-}
-
-static void smp_send_nmi_allbutself(void)
-{
-	cpumask_t mask = cpu_online_map;
-	cpu_clear(safe_smp_processor_id(), mask);
-	if (!cpus_empty(mask))
-		send_IPI_mask(mask, NMI_VECTOR);
-}
-
-static struct notifier_block crash_nmi_nb = {
-	.notifier_call = crash_nmi_callback,
-};
-
-void nmi_shootdown_cpus(nmi_shootdown_cb callback)
-{
-	unsigned long msecs;
-
-	/* Make a note of crashing cpu. Will be used in NMI callback.*/
-	crashing_cpu = safe_smp_processor_id();
-
-	shootdown_callback = callback;
-
-	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
-	/* Would it be better to replace the trap vector here? */
-	if (register_die_notifier(&crash_nmi_nb))
-		return;		/* return what? */
-	/* Ensure the new callback function is set before sending
-	 * out the NMI
-	 */
-	wmb();
-
-	smp_send_nmi_allbutself();
-
-	msecs = 1000; /* Wait at most a second for the other cpus to stop */
-	while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
-		mdelay(1);
-		msecs--;
-	}
-
-	/* Leave the nmi callback set */
-}
-
 static void kdump_nmi_shootdown_cpus(void)
 {
 	nmi_shootdown_cpus(kdump_nmi_callback);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 724adfc63cb9..364edeecc235 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -21,6 +21,9 @@
 # include <asm/iommu.h>
 #endif
 
+#include <mach_ipi.h>
+
+
 /*
  * Power off function, if any
  */
@@ -514,3 +517,83 @@ void machine_crash_shutdown(struct pt_regs *regs)
 	machine_ops.crash_shutdown(regs);
 }
 #endif
+
+
+#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
+
+/* This keeps a track of which one is crashing cpu. */
+static int crashing_cpu;
+static nmi_shootdown_cb shootdown_callback;
+
+static atomic_t waiting_for_crash_ipi;
+
+static int crash_nmi_callback(struct notifier_block *self,
+			unsigned long val, void *data)
+{
+	int cpu;
+
+	if (val != DIE_NMI_IPI)
+		return NOTIFY_OK;
+
+	cpu = raw_smp_processor_id();
+
+	/* Don't do anything if this handler is invoked on crashing cpu.
+	 * Otherwise, system will completely hang. Crashing cpu can get
+	 * an NMI if system was initially booted with nmi_watchdog parameter.
+	 */
+	if (cpu == crashing_cpu)
+		return NOTIFY_STOP;
+	local_irq_disable();
+
+	shootdown_callback(cpu, (struct die_args *)data);
+
+	atomic_dec(&waiting_for_crash_ipi);
+	/* Assume hlt works */
+	halt();
+	for (;;)
+		cpu_relax();
+
+	return 1;
+}
+
+static void smp_send_nmi_allbutself(void)
+{
+	cpumask_t mask = cpu_online_map;
+	cpu_clear(safe_smp_processor_id(), mask);
+	if (!cpus_empty(mask))
+		send_IPI_mask(mask, NMI_VECTOR);
+}
+
+static struct notifier_block crash_nmi_nb = {
+	.notifier_call = crash_nmi_callback,
+};
+
+void nmi_shootdown_cpus(nmi_shootdown_cb callback)
+{
+	unsigned long msecs;
+
+	/* Make a note of crashing cpu. Will be used in NMI callback.*/
+	crashing_cpu = safe_smp_processor_id();
+
+	shootdown_callback = callback;
+
+	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+	/* Would it be better to replace the trap vector here? */
+	if (register_die_notifier(&crash_nmi_nb))
+		return;		/* return what? */
+	/* Ensure the new callback function is set before sending
+	 * out the NMI
+	 */
+	wmb();
+
+	smp_send_nmi_allbutself();
+
+	msecs = 1000; /* Wait at most a second for the other cpus to stop */
+	while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
+		mdelay(1);
+		msecs--;
+	}
+
+	/* Leave the nmi callback set */
+}
+#endif
-- 
cgit v1.2.3-70-g09d2


From bb8dd270e62217e2d2172094c6c352c4ddc0a127 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Wed, 12 Nov 2008 11:34:43 -0200
Subject: x86: make nmi_shootdown_cpus() available on !SMP and !X86_LOCAL_APIC

Impact: widen nmi_shootdown_cpus() availability

The X86_LOCAL_APIC #ifdef was for kdump. For !SMP, the function simply
does nothing.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/reboot.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 364edeecc235..17a41e055565 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -519,7 +519,7 @@ void machine_crash_shutdown(struct pt_regs *regs)
 #endif
 
 
-#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
+#if defined(CONFIG_SMP)
 
 /* This keeps a track of which one is crashing cpu. */
 static int crashing_cpu;
@@ -568,6 +568,12 @@ static struct notifier_block crash_nmi_nb = {
 	.notifier_call = crash_nmi_callback,
 };
 
+/* Halt all other CPUs, calling the specified function on each of them
+ *
+ * This function can be used to halt all other CPUs on crash
+ * or emergency reboot time. The function passed as parameter
+ * will be called inside a NMI handler on all CPUs.
+ */
 void nmi_shootdown_cpus(nmi_shootdown_cb callback)
 {
 	unsigned long msecs;
@@ -596,4 +602,9 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
 
 	/* Leave the nmi callback set */
 }
+#else /* !CONFIG_SMP */
+void nmi_shootdown_cpus(nmi_shootdown_cb callback)
+{
+	/* No other CPUs to shoot down */
+}
 #endif
-- 
cgit v1.2.3-70-g09d2


From c415b3dce30dfb41234e118662e8720f47343a4f Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Wed, 12 Nov 2008 11:34:44 -0200
Subject: x86: disable IRQs before doing anything on nmi_shootdown_cpus()

Impact: make nmi_shootdown_cpus() callable from preemptible context

We need to know on which CPU we are running on, and we don't want to be
preempted while doing this.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/reboot.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 17a41e055565..c3cd512484e5 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -577,6 +577,7 @@ static struct notifier_block crash_nmi_nb = {
 void nmi_shootdown_cpus(nmi_shootdown_cb callback)
 {
 	unsigned long msecs;
+	local_irq_disable();
 
 	/* Make a note of crashing cpu. Will be used in NMI callback.*/
 	crashing_cpu = safe_smp_processor_id();
-- 
cgit v1.2.3-70-g09d2


From 8665596ec05498525014436520b316ba174a068a Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 12 Nov 2008 10:27:35 -0800
Subject: x86: fix up the new IRQ code for older versions of gas

Older versions of gas don't implement the C-style != operator, they
instead want the Pascal-style <> operator.  Change != to <> so we
don't break compilation with those old versions of gas.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/entry_32.S | 4 ++--
 arch/x86/kernel/entry_64.S | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index dae81b9fd451..bd02ec77edc4 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -635,12 +635,12 @@ vector=FIRST_EXTERNAL_VECTOR
 	.balign 32
   .rept	7
     .if vector < NR_VECTORS
-      .if vector != FIRST_EXTERNAL_VECTOR
+      .if vector <> FIRST_EXTERNAL_VECTOR
 	CFI_ADJUST_CFA_OFFSET -4
       .endif
 1:	pushl $(~vector+0x80)	/* Note: always in signed byte range */
 	CFI_ADJUST_CFA_OFFSET 4
-      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) != 6
+      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
 	jmp 2f
       .endif
       .previous
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 9b2aeaac9a6b..2b42362a85b2 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -644,12 +644,12 @@ vector=FIRST_EXTERNAL_VECTOR
 	.balign 32
   .rept	7
     .if vector < NR_VECTORS
-      .if vector != FIRST_EXTERNAL_VECTOR
+      .if vector <> FIRST_EXTERNAL_VECTOR
 	CFI_ADJUST_CFA_OFFSET -8
       .endif
 1:	pushq $(~vector+0x80)	/* Note: always in signed byte range */
 	CFI_ADJUST_CFA_OFFSET 8
-      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) != 6
+      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
 	jmp 2f
       .endif
       .previous
-- 
cgit v1.2.3-70-g09d2


From 8652cb4b0d87accbe78725fd2a13be2787059649 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Wed, 12 Nov 2008 13:35:00 -0500
Subject: x86: warn of incorrect cpu_khz on AMD systems

Impact: add debug check

If none of the perfctrs are free when calculating cpu_khz we default to
using ctr 3 (ie, we just choose 3).  This may lead to an incorrect tsc
freq value which can cause the system to be unstable.

To aid in future debugging, WARN the user of a potential problem.

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/time_64.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index cb19d650c216..418a095c5796 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -80,6 +80,8 @@ unsigned long __init calibrate_cpu(void)
 			break;
 	no_ctr_free = (i == 4);
 	if (no_ctr_free) {
+		WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... "
+		     "cpu_khz value may be incorrect.\n");
 		i = 3;
 		rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
 		wrmsrl(MSR_K7_EVNTSEL3, 0);
-- 
cgit v1.2.3-70-g09d2


From 722024dbb74f3ea316c285c0a71a4512e113b0c4 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Thu, 13 Nov 2008 13:50:20 +0100
Subject: x86: irq: fix apicinterrupts on 64 bits

Impact: Fix interrupt via the apicinterrupt macro

Checkin 939b787130bf22887a09d8fd2641a094dcef8c22 changed the
"interrupt" macro, but the "interrupt" macro is also invoked
indirectly from the "apicinterrupt" macro.

The "apicinterrupt" macro probably should have its own collection of
systematic stubs for the same reason the main IRQ code does; as is it
is a huge amount of replicated code.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/entry_64.S | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 2b42362a85b2..369de6973c58 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -675,9 +675,8 @@ END(interrupt)
  * Entry runs with interrupts off.	
  */ 
 
-/* 0(%rsp): ~(interrupt number)+0x80 */ 
+/* 0(%rsp): ~(interrupt number) */
 	.macro interrupt func
-	addq $-0x80,(%rsp)		/* Adjust vector to [-256,-1] range */
 	cld
 	SAVE_ARGS
 	leaq -ARGOFFSET(%rsp),%rdi	/* arg1 for handler */
@@ -711,9 +710,14 @@ END(interrupt)
 	call \func
 	.endm
 
+	/*
+	 * The interrupt stubs push (~vector+0x80) onto the stack and
+	 * then jump to common_interrupt.
+	 */
 	.p2align CONFIG_X86_L1_CACHE_SHIFT
 common_interrupt:
 	XCPT_FRAME
+	addq $-0x80,(%rsp)		/* Adjust vector to [-256,-1] range */
 	interrupt do_IRQ
 	/* 0(%rsp): oldrsp-ARGOFFSET */
 ret_from_intr:
-- 
cgit v1.2.3-70-g09d2


From 0bd7b79851d0f74b24a9ce87d088f2e7c718f668 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@fastmail.fm>
Date: Sun, 16 Nov 2008 15:29:00 +0100
Subject: x86: entry_64.S: remove whitespace at end of lines

Impact: cleanup

All blame goes to: color white,red "[^[:graph:]]+$"
in .nanorc ;).

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S | 190 ++++++++++++++++++++++-----------------------
 1 file changed, 95 insertions(+), 95 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b86f332c96a6..54927784bab9 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -11,15 +11,15 @@
  *
  * NOTE: This code handles signal-recognition, which happens every time
  * after an interrupt and after each system call.
- * 
- * Normal syscalls and interrupts don't save a full stack frame, this is 
+ *
+ * Normal syscalls and interrupts don't save a full stack frame, this is
  * only done for syscall tracing, signals or fork/exec et.al.
- * 
- * A note on terminology:	 
- * - top of stack: Architecture defined interrupt frame from SS to RIP 
- * at the top of the kernel process stack.	
+ *
+ * A note on terminology:
+ * - top of stack: Architecture defined interrupt frame from SS to RIP
+ * at the top of the kernel process stack.
  * - partial stack frame: partially saved registers upto R11.
- * - full stack frame: Like partial stack frame, but all register saved. 
+ * - full stack frame: Like partial stack frame, but all register saved.
  *
  * Some macro usage:
  * - CFI macros are used to generate dwarf2 unwind information for better
@@ -142,7 +142,7 @@ END(mcount)
 
 #ifndef CONFIG_PREEMPT
 #define retint_kernel retint_restore_args
-#endif	
+#endif
 
 #ifdef CONFIG_PARAVIRT
 ENTRY(native_usergs_sysret64)
@@ -161,14 +161,14 @@ ENTRY(native_usergs_sysret64)
 .endm
 
 /*
- * C code is not supposed to know about undefined top of stack. Every time 
- * a C function with an pt_regs argument is called from the SYSCALL based 
+ * C code is not supposed to know about undefined top of stack. Every time
+ * a C function with an pt_regs argument is called from the SYSCALL based
  * fast path FIXUP_TOP_OF_STACK is needed.
  * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  * manipulation.
- */        	
-		
-	/* %rsp:at FRAMEEND */ 
+ */
+
+	/* %rsp:at FRAMEEND */
 	.macro FIXUP_TOP_OF_STACK tmp
 	movq	%gs:pda_oldrsp,\tmp
 	movq  	\tmp,RSP(%rsp)
@@ -244,8 +244,8 @@ ENTRY(native_usergs_sysret64)
 	.endm
 /*
  * A newly forked process directly context switches into this.
- */ 	
-/* rdi:	prev */	
+ */
+/* rdi:	prev */
 ENTRY(ret_from_fork)
 	CFI_DEFAULT_STACK
 	push kernel_eflags(%rip)
@@ -256,7 +256,7 @@ ENTRY(ret_from_fork)
 	GET_THREAD_INFO(%rcx)
 	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
 	jnz rff_trace
-rff_action:	
+rff_action:
 	RESTORE_REST
 	testl $3,CS-ARGOFFSET(%rsp)	# from kernel_thread?
 	je   int_ret_from_sys_call
@@ -267,7 +267,7 @@ rff_action:
 rff_trace:
 	movq %rsp,%rdi
 	call syscall_trace_leave
-	GET_THREAD_INFO(%rcx)	
+	GET_THREAD_INFO(%rcx)
 	jmp rff_action
 	CFI_ENDPROC
 END(ret_from_fork)
@@ -278,20 +278,20 @@ END(ret_from_fork)
  * SYSCALL does not save anything on the stack and does not change the
  * stack pointer.
  */
-		
+
 /*
- * Register setup:	
+ * Register setup:
  * rax  system call number
  * rdi  arg0
- * rcx  return address for syscall/sysret, C arg3 
+ * rcx  return address for syscall/sysret, C arg3
  * rsi  arg1
- * rdx  arg2	
+ * rdx  arg2
  * r10  arg3 	(--> moved to rcx for C)
  * r8   arg4
  * r9   arg5
  * r11  eflags for syscall/sysret, temporary for C
- * r12-r15,rbp,rbx saved by C code, not touched. 		
- * 
+ * r12-r15,rbp,rbx saved by C code, not touched.
+ *
  * Interrupts are off on entry.
  * Only called from user space.
  *
@@ -301,7 +301,7 @@ END(ret_from_fork)
  * When user can change the frames always force IRET. That is because
  * it deals with uncanonical addresses better. SYSRET has trouble
  * with them due to bugs in both AMD and Intel CPUs.
- */ 			 		
+ */
 
 ENTRY(system_call)
 	CFI_STARTPROC	simple
@@ -317,7 +317,7 @@ ENTRY(system_call)
 	 */
 ENTRY(system_call_after_swapgs)
 
-	movq	%rsp,%gs:pda_oldrsp 
+	movq	%rsp,%gs:pda_oldrsp
 	movq	%gs:pda_kernelstack,%rsp
 	/*
 	 * No need to follow this irqs off/on section - it's straight
@@ -325,7 +325,7 @@ ENTRY(system_call_after_swapgs)
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	SAVE_ARGS 8,1
-	movq  %rax,ORIG_RAX-ARGOFFSET(%rsp) 
+	movq  %rax,ORIG_RAX-ARGOFFSET(%rsp)
 	movq  %rcx,RIP-ARGOFFSET(%rsp)
 	CFI_REL_OFFSET rip,RIP-ARGOFFSET
 	GET_THREAD_INFO(%rcx)
@@ -339,19 +339,19 @@ system_call_fastpath:
 	movq %rax,RAX-ARGOFFSET(%rsp)
 /*
  * Syscall return path ending with SYSRET (fast path)
- * Has incomplete stack frame and undefined top of stack. 
- */		
+ * Has incomplete stack frame and undefined top of stack.
+ */
 ret_from_sys_call:
 	movl $_TIF_ALLWORK_MASK,%edi
 	/* edi:	flagmask */
-sysret_check:		
+sysret_check:
 	LOCKDEP_SYS_EXIT
 	GET_THREAD_INFO(%rcx)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	movl TI_flags(%rcx),%edx
 	andl %edi,%edx
-	jnz  sysret_careful 
+	jnz  sysret_careful
 	CFI_REMEMBER_STATE
 	/*
 	 * sysretq will re-enable interrupts:
@@ -366,7 +366,7 @@ sysret_check:
 
 	CFI_RESTORE_STATE
 	/* Handle reschedules */
-	/* edx:	work, edi: workmask */	
+	/* edx:	work, edi: workmask */
 sysret_careful:
 	bt $TIF_NEED_RESCHED,%edx
 	jnc sysret_signal
@@ -379,7 +379,7 @@ sysret_careful:
 	CFI_ADJUST_CFA_OFFSET -8
 	jmp sysret_check
 
-	/* Handle a signal */ 
+	/* Handle a signal */
 sysret_signal:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
@@ -398,7 +398,7 @@ sysret_signal:
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	jmp int_with_check
-	
+
 badsys:
 	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
 	jmp ret_from_sys_call
@@ -437,7 +437,7 @@ sysret_audit:
 #endif	/* CONFIG_AUDITSYSCALL */
 
 	/* Do syscall tracing */
-tracesys:			 
+tracesys:
 #ifdef CONFIG_AUDITSYSCALL
 	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
 	jz auditsys
@@ -460,8 +460,8 @@ tracesys:
 	call *sys_call_table(,%rax,8)
 	movq %rax,RAX-ARGOFFSET(%rsp)
 	/* Use IRET because user could have changed frame */
-		
-/* 
+
+/*
  * Syscall return path ending with IRET.
  * Has correct top of stack, but partial stack frame.
  */
@@ -505,18 +505,18 @@ int_very_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	SAVE_REST
-	/* Check for syscall exit trace */	
+	/* Check for syscall exit trace */
 	testl $_TIF_WORK_SYSCALL_EXIT,%edx
 	jz int_signal
 	pushq %rdi
 	CFI_ADJUST_CFA_OFFSET 8
-	leaq 8(%rsp),%rdi	# &ptregs -> arg1	
+	leaq 8(%rsp),%rdi	# &ptregs -> arg1
 	call syscall_trace_leave
 	popq %rdi
 	CFI_ADJUST_CFA_OFFSET -8
 	andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
 	jmp int_restore_rest
-	
+
 int_signal:
 	testl $_TIF_DO_NOTIFY_MASK,%edx
 	jz 1f
@@ -531,11 +531,11 @@ int_restore_rest:
 	jmp int_with_check
 	CFI_ENDPROC
 END(system_call)
-		
-/* 
+
+/*
  * Certain special system calls that need to save a complete full stack frame.
- */ 								
-	
+ */
+
 	.macro PTREGSCALL label,func,arg
 	.globl \label
 \label:
@@ -572,7 +572,7 @@ ENTRY(ptregscall_common)
 	ret
 	CFI_ENDPROC
 END(ptregscall_common)
-	
+
 ENTRY(stub_execve)
 	CFI_STARTPROC
 	popq %r11
@@ -588,11 +588,11 @@ ENTRY(stub_execve)
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_execve)
-	
+
 /*
  * sigreturn is special because it needs to restore all registers on return.
  * This cannot be done with SYSRET, so use the IRET return path instead.
- */                
+ */
 ENTRY(stub_rt_sigreturn)
 	CFI_STARTPROC
 	addq $8, %rsp
@@ -685,12 +685,12 @@ exit_intr:
 	GET_THREAD_INFO(%rcx)
 	testl $3,CS-ARGOFFSET(%rsp)
 	je retint_kernel
-	
+
 	/* Interrupt came from user space */
 	/*
 	 * Has a correct top of stack, but a partial stack frame
 	 * %rcx: thread info. Interrupts off.
-	 */		
+	 */
 retint_with_reschedule:
 	movl $_TIF_WORK_MASK,%edi
 retint_check:
@@ -763,20 +763,20 @@ retint_careful:
 	pushq %rdi
 	CFI_ADJUST_CFA_OFFSET	8
 	call  schedule
-	popq %rdi		
+	popq %rdi
 	CFI_ADJUST_CFA_OFFSET	-8
 	GET_THREAD_INFO(%rcx)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	jmp retint_check
-	
+
 retint_signal:
 	testl $_TIF_DO_NOTIFY_MASK,%edx
 	jz    retint_swapgs
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	SAVE_REST
-	movq $-1,ORIG_RAX(%rsp) 			
+	movq $-1,ORIG_RAX(%rsp)
 	xorl %esi,%esi		# oldset
 	movq %rsp,%rdi		# &pt_regs
 	call do_notify_resume
@@ -798,14 +798,14 @@ ENTRY(retint_kernel)
 	jnc  retint_restore_args
 	call preempt_schedule_irq
 	jmp exit_intr
-#endif	
+#endif
 
 	CFI_ENDPROC
 END(common_interrupt)
-	
+
 /*
  * APIC interrupts.
- */		
+ */
 	.macro apicinterrupt num,func
 	INTR_FRAME
 	pushq $~(\num)
@@ -823,14 +823,14 @@ ENTRY(threshold_interrupt)
 	apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
 END(threshold_interrupt)
 
-#ifdef CONFIG_SMP	
+#ifdef CONFIG_SMP
 ENTRY(reschedule_interrupt)
 	apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
 END(reschedule_interrupt)
 
 	.macro INVALIDATE_ENTRY num
 ENTRY(invalidate_interrupt\num)
-	apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt	
+	apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
 END(invalidate_interrupt\num)
 	.endm
 
@@ -869,22 +869,22 @@ END(error_interrupt)
 ENTRY(spurious_interrupt)
 	apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
 END(spurious_interrupt)
-				
+
 /*
  * Exception entry points.
- */ 		
+ */
 	.macro zeroentry sym
 	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
-	pushq $0	/* push error code/oldrax */ 
+	pushq $0	/* push error code/oldrax */
 	CFI_ADJUST_CFA_OFFSET 8
-	pushq %rax	/* push real oldrax to the rdi slot */ 
+	pushq %rax	/* push real oldrax to the rdi slot */
 	CFI_ADJUST_CFA_OFFSET 8
 	CFI_REL_OFFSET rax,0
 	leaq  \sym(%rip),%rax
 	jmp error_entry
 	CFI_ENDPROC
-	.endm	
+	.endm
 
 	.macro errorentry sym
 	XCPT_FRAME
@@ -998,13 +998,13 @@ paranoid_schedule\trace:
 
 /*
  * Exception entry point. This expects an error code/orig_rax on the stack
- * and the exception handler in %rax.	
- */ 		  				
+ * and the exception handler in %rax.
+ */
 KPROBE_ENTRY(error_entry)
 	_frame RDI
 	CFI_REL_OFFSET rax,0
 	/* rdi slot contains rax, oldrax contains error code */
-	cld	
+	cld
 	subq  $14*8,%rsp
 	CFI_ADJUST_CFA_OFFSET	(14*8)
 	movq %rsi,13*8(%rsp)
@@ -1015,7 +1015,7 @@ KPROBE_ENTRY(error_entry)
 	CFI_REL_OFFSET	rdx,RDX
 	movq %rcx,11*8(%rsp)
 	CFI_REL_OFFSET	rcx,RCX
-	movq %rsi,10*8(%rsp)	/* store rax */ 
+	movq %rsi,10*8(%rsp)	/* store rax */
 	CFI_REL_OFFSET	rax,RAX
 	movq %r8, 9*8(%rsp)
 	CFI_REL_OFFSET	r8,R8
@@ -1025,29 +1025,29 @@ KPROBE_ENTRY(error_entry)
 	CFI_REL_OFFSET	r10,R10
 	movq %r11,6*8(%rsp)
 	CFI_REL_OFFSET	r11,R11
-	movq %rbx,5*8(%rsp) 
+	movq %rbx,5*8(%rsp)
 	CFI_REL_OFFSET	rbx,RBX
-	movq %rbp,4*8(%rsp) 
+	movq %rbp,4*8(%rsp)
 	CFI_REL_OFFSET	rbp,RBP
-	movq %r12,3*8(%rsp) 
+	movq %r12,3*8(%rsp)
 	CFI_REL_OFFSET	r12,R12
-	movq %r13,2*8(%rsp) 
+	movq %r13,2*8(%rsp)
 	CFI_REL_OFFSET	r13,R13
-	movq %r14,1*8(%rsp) 
+	movq %r14,1*8(%rsp)
 	CFI_REL_OFFSET	r14,R14
-	movq %r15,(%rsp) 
+	movq %r15,(%rsp)
 	CFI_REL_OFFSET	r15,R15
-	xorl %ebx,%ebx	
+	xorl %ebx,%ebx
 	testl $3,CS(%rsp)
 	je  error_kernelspace
-error_swapgs:	
+error_swapgs:
 	SWAPGS
 error_sti:
 	TRACE_IRQS_OFF
-	movq %rdi,RDI(%rsp) 	
+	movq %rdi,RDI(%rsp)
 	CFI_REL_OFFSET	rdi,RDI
 	movq %rsp,%rdi
-	movq ORIG_RAX(%rsp),%rsi	/* get error code */ 
+	movq ORIG_RAX(%rsp),%rsi	/* get error code */
 	movq $-1,ORIG_RAX(%rsp)
 	call *%rax
 	/* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */
@@ -1056,7 +1056,7 @@ error_exit:
 	RESTORE_REST
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	GET_THREAD_INFO(%rcx)	
+	GET_THREAD_INFO(%rcx)
 	testl %eax,%eax
 	jne  retint_kernel
 	LOCKDEP_SYS_EXIT_IRQ
@@ -1072,7 +1072,7 @@ error_kernelspace:
        /* There are two places in the kernel that can potentially fault with
           usergs. Handle them here. The exception handlers after
 	   iret run with kernel gs again, so don't set the user space flag.
-	   B stepping K8s sometimes report an truncated RIP for IRET 
+	   B stepping K8s sometimes report an truncated RIP for IRET
 	   exceptions returning to compat mode. Check for these here too. */
 	leaq irq_return(%rip),%rcx
 	cmpq %rcx,RIP(%rsp)
@@ -1084,17 +1084,17 @@ error_kernelspace:
         je   error_swapgs
 	jmp  error_sti
 KPROBE_END(error_entry)
-	
+
        /* Reload gs selector with exception handling */
-       /* edi:  new selector */ 
+       /* edi:  new selector */
 ENTRY(native_load_gs_index)
 	CFI_STARTPROC
 	pushf
 	CFI_ADJUST_CFA_OFFSET 8
 	DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
         SWAPGS
-gs_change:     
-        movl %edi,%gs   
+gs_change:
+        movl %edi,%gs
 2:	mfence		/* workaround */
 	SWAPGS
         popf
@@ -1102,20 +1102,20 @@ gs_change:
         ret
 	CFI_ENDPROC
 ENDPROC(native_load_gs_index)
-       
+
         .section __ex_table,"a"
         .align 8
         .quad gs_change,bad_gs
         .previous
         .section .fixup,"ax"
 	/* running with kernelgs */
-bad_gs: 
+bad_gs:
 	SWAPGS			/* switch back to user gs */
 	xorl %eax,%eax
         movl %eax,%gs
         jmp  2b
-        .previous       
-	
+        .previous
+
 /*
  * Create a kernel thread.
  *
@@ -1138,7 +1138,7 @@ ENTRY(kernel_thread)
 
 	xorl %r8d,%r8d
 	xorl %r9d,%r9d
-	
+
 	# clone now
 	call do_fork
 	movq %rax,RAX(%rsp)
@@ -1149,14 +1149,14 @@ ENTRY(kernel_thread)
 	 * so internally to the x86_64 port you can rely on kernel_thread()
 	 * not to reschedule the child before returning, this avoids the need
 	 * of hacks for example to fork off the per-CPU idle tasks.
-         * [Hopefully no generic code relies on the reschedule -AK]	
+         * [Hopefully no generic code relies on the reschedule -AK]
 	 */
 	RESTORE_ALL
 	UNFAKE_STACK_FRAME
 	ret
 	CFI_ENDPROC
 ENDPROC(kernel_thread)
-	
+
 child_rip:
 	pushq $0		# fake return address
 	CFI_STARTPROC
@@ -1191,10 +1191,10 @@ ENDPROC(child_rip)
 ENTRY(kernel_execve)
 	CFI_STARTPROC
 	FAKE_STACK_FRAME $0
-	SAVE_ALL	
+	SAVE_ALL
 	movq %rsp,%rcx
 	call sys_execve
-	movq %rax, RAX(%rsp)	
+	movq %rax, RAX(%rsp)
 	RESTORE_REST
 	testq %rax,%rax
 	je int_ret_from_sys_call
@@ -1213,7 +1213,7 @@ ENTRY(coprocessor_error)
 END(coprocessor_error)
 
 ENTRY(simd_coprocessor_error)
-	zeroentry do_simd_coprocessor_error	
+	zeroentry do_simd_coprocessor_error
 END(simd_coprocessor_error)
 
 ENTRY(device_not_available)
@@ -1225,12 +1225,12 @@ KPROBE_ENTRY(debug)
  	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	pushq $0
-	CFI_ADJUST_CFA_OFFSET 8		
+	CFI_ADJUST_CFA_OFFSET 8
 	paranoidentry do_debug, DEBUG_STACK
 	paranoidexit
 KPROBE_END(debug)
 
-	/* runs on exception stack */	
+	/* runs on exception stack */
 KPROBE_ENTRY(nmi)
 	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
@@ -1264,7 +1264,7 @@ ENTRY(bounds)
 END(bounds)
 
 ENTRY(invalid_op)
-	zeroentry do_invalid_op	
+	zeroentry do_invalid_op
 END(invalid_op)
 
 ENTRY(coprocessor_segment_overrun)
@@ -1319,7 +1319,7 @@ ENTRY(machine_check)
 	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	pushq $0
-	CFI_ADJUST_CFA_OFFSET 8	
+	CFI_ADJUST_CFA_OFFSET 8
 	paranoidentry do_machine_check
 	jmp paranoid_exit1
 	CFI_ENDPROC
-- 
cgit v1.2.3-70-g09d2


From 569712b2b0970fa5b19673544d62ae661d04a220 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sun, 16 Nov 2008 03:12:49 -0800
Subject: x86: fix wakeup_cpu with numaq/es7000, v2

Impact: fix secondary-CPU wakeup/init path with numaq and es7000

While looking at wakeup_secondary_cpu for WAKE_SECONDARY_VIA_NMI:

|#ifdef WAKE_SECONDARY_VIA_NMI
|/*
| * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
| * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
| * won't ... remember to clear down the APIC, etc later.
| */
|static int __devinit
|wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
|{
|        unsigned long send_status, accept_status = 0;
|        int maxlvt;
|...
|        if (APIC_INTEGRATED(apic_version[phys_apicid])) {
|                maxlvt = lapic_get_maxlvt();

I noticed that there is no warning about undefined phys_apicid...

because WAKE_SECONDARY_VIA_NMI and WAKE_SECONDARY_VIA_INIT can not be
defined at the same time. So NUMAQ is using wrong wakeup_secondary_cpu.

WAKE_SECONDARY_VIA_NMI, WAKE_SECONDARY_VIA_INIT and
WAKE_SECONDARY_VIA_MIP are variants of a weird and fragile
preprocessor-driven "HAL" mechanisms to specify the kind of secondary-CPU
wakeup strategy a given x86 kernel will use.

The vast majority of systems want to use INIT for secondary wakeup - NUMAQ
uses an NMI, (old-style-) ES7000 uses 'MIP' (a firmware driven in-memory
flag to let secondaries continue).

So convert these mechanisms to x86_quirks and add a
->wakeup_secondary_cpu() method to specify the rare exception
to the sane default.

Extend genapic accordingly as well, for 32-bit.

While looking further, I noticed that functions in wakecup.h for numaq
and es7000 are different to the default in mach_wakecpu.h - but smpboot.c
will only use default mach_wakecpu.h with smphook.h.

So we need to add mach_wakecpu.h for mach_generic, to properly support
numaq and es7000, and vectorize the following SMP init methods:

	int trampoline_phys_low;
	int trampoline_phys_high;
	void (*wait_for_init_deassert)(atomic_t *deassert);
	void (*smp_callin_clear_local_apic)(void);
	void (*store_NMI_vector)(unsigned short *high, unsigned short *low);
	void (*restore_NMI_vector)(unsigned short *high, unsigned short *low);
	void (*inquire_remote_apic)(int apicid);

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/bigsmp/apic.h                |  2 --
 arch/x86/include/asm/es7000/apic.h                |  3 --
 arch/x86/include/asm/es7000/wakecpu.h             | 41 ++++++---------------
 arch/x86/include/asm/genapic_32.h                 | 17 ++++++++-
 arch/x86/include/asm/mach-default/mach_wakecpu.h  | 24 +++++--------
 arch/x86/include/asm/mach-default/smpboot_hooks.h |  8 +++--
 arch/x86/include/asm/mach-generic/mach_wakecpu.h  | 12 +++++++
 arch/x86/include/asm/numaq/wakecpu.h              | 24 +++++++------
 arch/x86/include/asm/setup.h                      |  2 ++
 arch/x86/kernel/es7000_32.c                       | 44 ++++++++++++-----------
 arch/x86/kernel/numaq_32.c                        |  1 +
 arch/x86/kernel/smpboot.c                         | 24 ++++++++-----
 arch/x86/mach-generic/bigsmp.c                    |  1 +
 arch/x86/mach-generic/default.c                   |  1 +
 arch/x86/mach-generic/summit.c                    |  1 +
 15 files changed, 110 insertions(+), 95 deletions(-)
 create mode 100644 arch/x86/include/asm/mach-generic/mach_wakecpu.h

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h
index 1d9543b9d358..ce547f24a1cd 100644
--- a/arch/x86/include/asm/bigsmp/apic.h
+++ b/arch/x86/include/asm/bigsmp/apic.h
@@ -24,8 +24,6 @@ static inline cpumask_t target_cpus(void)
 #define INT_DELIVERY_MODE	(dest_Fixed)
 #define INT_DEST_MODE		(0)    /* phys delivery to target proc */
 #define NO_BALANCE_IRQ		(0)
-#define WAKE_SECONDARY_VIA_INIT
-
 
 static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
 {
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index 380f0b4f17ed..9d8cf776c285 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -23,8 +23,6 @@ static inline cpumask_t target_cpus(void)
 #define INT_DELIVERY_MODE	(dest_LowestPrio)
 #define INT_DEST_MODE		(1)    /* logical delivery broadcast to all procs */
 #define NO_BALANCE_IRQ		(1)
-#undef  WAKE_SECONDARY_VIA_INIT
-#define WAKE_SECONDARY_VIA_MIP
 #else
 #define APIC_DFR_VALUE		(APIC_DFR_FLAT)
 #define INT_DELIVERY_MODE	(dest_Fixed)
@@ -32,7 +30,6 @@ static inline cpumask_t target_cpus(void)
 #define NO_BALANCE_IRQ		(0)
 #undef  APIC_DEST_LOGICAL
 #define APIC_DEST_LOGICAL	0x0
-#define WAKE_SECONDARY_VIA_INIT
 #endif
 
 static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h
index 398493461913..78f0daaee436 100644
--- a/arch/x86/include/asm/es7000/wakecpu.h
+++ b/arch/x86/include/asm/es7000/wakecpu.h
@@ -1,36 +1,12 @@
 #ifndef __ASM_ES7000_WAKECPU_H
 #define __ASM_ES7000_WAKECPU_H
 
-/*
- * This file copes with machines that wakeup secondary CPUs by the
- * INIT, INIT, STARTUP sequence.
- */
-
-#ifdef CONFIG_ES7000_CLUSTERED_APIC
-#define WAKE_SECONDARY_VIA_MIP
-#else
-#define WAKE_SECONDARY_VIA_INIT
-#endif
-
-#ifdef WAKE_SECONDARY_VIA_MIP
-extern int es7000_start_cpu(int cpu, unsigned long eip);
-static inline int
-wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
-{
-	int boot_error = 0;
-	boot_error = es7000_start_cpu(phys_apicid, start_eip);
-	return boot_error;
-}
-#endif
-
-#define TRAMPOLINE_LOW phys_to_virt(0x467)
-#define TRAMPOLINE_HIGH phys_to_virt(0x469)
-
-#define boot_cpu_apicid boot_cpu_physical_apicid
+#define TRAMPOLINE_PHYS_LOW	0x467
+#define TRAMPOLINE_PHYS_HIGH	0x469
 
 static inline void wait_for_init_deassert(atomic_t *deassert)
 {
-#ifdef WAKE_SECONDARY_VIA_INIT
+#ifndef CONFIG_ES7000_CLUSTERED_APIC
 	while (!atomic_read(deassert))
 		cpu_relax();
 #endif
@@ -50,9 +26,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
 {
 }
 
-#define inquire_remote_apic(apicid) do {		\
-		if (apic_verbosity >= APIC_DEBUG)	\
-			__inquire_remote_apic(apicid);	\
-	} while (0)
+extern void __inquire_remote_apic(int apicid);
+
+static inline void inquire_remote_apic(int apicid)
+{
+	if (apic_verbosity >= APIC_DEBUG)
+		__inquire_remote_apic(apicid);
+}
 
 #endif /* __ASM_MACH_WAKECPU_H */
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 5cbd4fcc06fd..39bd8c1db3f5 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -2,6 +2,7 @@
 #define _ASM_X86_GENAPIC_32_H
 
 #include <asm/mpspec.h>
+#include <asm/atomic.h>
 
 /*
  * Generic APIC driver interface.
@@ -65,6 +66,13 @@ struct genapic {
 	void (*send_IPI_allbutself)(int vector);
 	void (*send_IPI_all)(int vector);
 #endif
+	int trampoline_phys_low;
+	int trampoline_phys_high;
+	void (*wait_for_init_deassert)(atomic_t *deassert);
+	void (*smp_callin_clear_local_apic)(void);
+	void (*store_NMI_vector)(unsigned short *high, unsigned short *low);
+	void (*restore_NMI_vector)(unsigned short *high, unsigned short *low);
+	void (*inquire_remote_apic)(int apicid);
 };
 
 #define APICFUNC(x) .x = x,
@@ -105,13 +113,20 @@ struct genapic {
 	APICFUNC(get_apic_id)				\
 	.apic_id_mask = APIC_ID_MASK,			\
 	APICFUNC(cpu_mask_to_apicid)			\
-	APICFUNC(vector_allocation_domain)			\
+	APICFUNC(vector_allocation_domain)		\
 	APICFUNC(acpi_madt_oem_check)			\
 	IPIFUNC(send_IPI_mask)				\
 	IPIFUNC(send_IPI_allbutself)			\
 	IPIFUNC(send_IPI_all)				\
 	APICFUNC(enable_apic_mode)			\
 	APICFUNC(phys_pkg_id)				\
+	.trampoline_phys_low = TRAMPOLINE_PHYS_LOW,		\
+	.trampoline_phys_high = TRAMPOLINE_PHYS_HIGH,		\
+	APICFUNC(wait_for_init_deassert)		\
+	APICFUNC(smp_callin_clear_local_apic)		\
+	APICFUNC(store_NMI_vector)			\
+	APICFUNC(restore_NMI_vector)			\
+	APICFUNC(inquire_remote_apic)			\
 }
 
 extern struct genapic *genapic;
diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h
index 9d80db91e992..ceb013660146 100644
--- a/arch/x86/include/asm/mach-default/mach_wakecpu.h
+++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h
@@ -1,17 +1,8 @@
 #ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H
 #define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H
 
-/* 
- * This file copes with machines that wakeup secondary CPUs by the
- * INIT, INIT, STARTUP sequence.
- */
-
-#define WAKE_SECONDARY_VIA_INIT
-
-#define TRAMPOLINE_LOW phys_to_virt(0x467)
-#define TRAMPOLINE_HIGH phys_to_virt(0x469)
-
-#define boot_cpu_apicid boot_cpu_physical_apicid
+#define TRAMPOLINE_PHYS_LOW (0x467)
+#define TRAMPOLINE_PHYS_HIGH (0x469)
 
 static inline void wait_for_init_deassert(atomic_t *deassert)
 {
@@ -33,9 +24,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
 {
 }
 
-#define inquire_remote_apic(apicid) do {		\
-		if (apic_verbosity >= APIC_DEBUG)	\
-			__inquire_remote_apic(apicid);	\
-	} while (0)
+extern void __inquire_remote_apic(int apicid);
+
+static inline void inquire_remote_apic(int apicid)
+{
+	if (apic_verbosity >= APIC_DEBUG)
+		__inquire_remote_apic(apicid);
+}
 
 #endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */
diff --git a/arch/x86/include/asm/mach-default/smpboot_hooks.h b/arch/x86/include/asm/mach-default/smpboot_hooks.h
index dbab36d64d48..23bf52103b89 100644
--- a/arch/x86/include/asm/mach-default/smpboot_hooks.h
+++ b/arch/x86/include/asm/mach-default/smpboot_hooks.h
@@ -13,9 +13,11 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
 	CMOS_WRITE(0xa, 0xf);
 	local_flush_tlb();
 	pr_debug("1.\n");
-	*((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
+	*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
+								 start_eip >> 4;
 	pr_debug("2.\n");
-	*((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
+	*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
+							 start_eip & 0xf;
 	pr_debug("3.\n");
 }
 
@@ -32,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
 	 */
 	CMOS_WRITE(0, 0xf);
 
-	*((volatile long *) phys_to_virt(0x467)) = 0;
+	*((volatile long *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
 }
 
 static inline void __init smpboot_setup_io_apic(void)
diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h
new file mode 100644
index 000000000000..1ab16b168c8a
--- /dev/null
+++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H
+#define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H
+
+#define TRAMPOLINE_PHYS_LOW (genapic->trampoline_phys_low)
+#define TRAMPOLINE_PHYS_HIGH (genapic->trampoline_phys_high)
+#define wait_for_init_deassert (genapic->wait_for_init_deassert)
+#define smp_callin_clear_local_apic (genapic->smp_callin_clear_local_apic)
+#define store_NMI_vector (genapic->store_NMI_vector)
+#define restore_NMI_vector (genapic->restore_NMI_vector)
+#define inquire_remote_apic (genapic->inquire_remote_apic)
+
+#endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */
diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h
index c577bda5b1c5..6f499df8eddb 100644
--- a/arch/x86/include/asm/numaq/wakecpu.h
+++ b/arch/x86/include/asm/numaq/wakecpu.h
@@ -3,12 +3,8 @@
 
 /* This file copes with machines that wakeup secondary CPUs by NMIs */
 
-#define WAKE_SECONDARY_VIA_NMI
-
-#define TRAMPOLINE_LOW phys_to_virt(0x8)
-#define TRAMPOLINE_HIGH phys_to_virt(0xa)
-
-#define boot_cpu_apicid boot_cpu_logical_apicid
+#define TRAMPOLINE_PHYS_LOW (0x8)
+#define TRAMPOLINE_PHYS_HIGH (0xa)
 
 /* We don't do anything here because we use NMI's to boot instead */
 static inline void wait_for_init_deassert(atomic_t *deassert)
@@ -27,17 +23,23 @@ static inline void smp_callin_clear_local_apic(void)
 static inline void store_NMI_vector(unsigned short *high, unsigned short *low)
 {
 	printk("Storing NMI vector\n");
-	*high = *((volatile unsigned short *) TRAMPOLINE_HIGH);
-	*low = *((volatile unsigned short *) TRAMPOLINE_LOW);
+	*high =
+	  *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH));
+	*low =
+	  *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW));
 }
 
 static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
 {
 	printk("Restoring NMI vector\n");
-	*((volatile unsigned short *) TRAMPOLINE_HIGH) = *high;
-	*((volatile unsigned short *) TRAMPOLINE_LOW) = *low;
+	*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
+								 *high;
+	*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
+								 *low;
 }
 
-#define inquire_remote_apic(apicid) {}
+static inline void inquire_remote_apic(int apicid)
+{
+}
 
 #endif /* __ASM_NUMAQ_WAKECPU_H */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index f12d37237465..40b2d3304911 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -16,6 +16,7 @@ static inline void visws_early_detect(void) { }
 static inline int is_visws_box(void) { return 0; }
 #endif
 
+extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
 /*
  * Any setup quirks to be performed?
  */
@@ -39,6 +40,7 @@ struct x86_quirks {
 	void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
                                     unsigned short oemsize);
 	int (*setup_ioapic_ids)(void);
+	int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
 };
 
 extern struct x86_quirks *x86_quirks;
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
index f454c78fcef6..bed10dddf099 100644
--- a/arch/x86/kernel/es7000_32.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -40,6 +40,7 @@
 #include <asm/smp.h>
 #include <asm/apicdef.h>
 #include <mach_mpparse.h>
+#include <asm/setup.h>
 
 /*
  * ES7000 chipsets
@@ -161,6 +162,26 @@ es7000_rename_gsi(int ioapic, int gsi)
 	return gsi;
 }
 
+#ifdef CONFIG_ES7000_CLUSTERED_APIC
+static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
+{
+	unsigned long vect = 0, psaival = 0;
+
+	if (psai == NULL)
+		return -1;
+
+	vect = ((unsigned long)__pa(eip)/0x1000) << 16;
+	psaival = (0x1000000 | vect | cpu);
+
+	while (*psai & 0x1000000)
+		;
+
+	*psai = psaival;
+
+	return 0;
+}
+#endif
+
 void __init
 setup_unisys(void)
 {
@@ -176,6 +197,9 @@ setup_unisys(void)
 	else
 		es7000_plat = ES7000_CLASSIC;
 	ioapic_renumber_irq = es7000_rename_gsi;
+#ifdef CONFIG_ES7000_CLUSTERED_APIC
+	x86_quirks->wakeup_secondary_cpu = wakeup_secondary_cpu_via_mip;
+#endif
 }
 
 /*
@@ -324,26 +348,6 @@ es7000_mip_write(struct mip_reg *mip_reg)
 	return status;
 }
 
-int
-es7000_start_cpu(int cpu, unsigned long eip)
-{
-	unsigned long vect = 0, psaival = 0;
-
-	if (psai == NULL)
-		return -1;
-
-	vect = ((unsigned long)__pa(eip)/0x1000) << 16;
-	psaival = (0x1000000 | vect | cpu);
-
-	while (*psai & 0x1000000)
-                ;
-
-	*psai = psaival;
-
-	return 0;
-
-}
-
 void __init
 es7000_sw_apic(void)
 {
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index 4caff39078e0..745891b7d0fb 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -250,6 +250,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
 	.mpc_oem_pci_bus	= mpc_oem_pci_bus,
 	.smp_read_mpc_oem	= smp_read_mpc_oem,
 	.setup_ioapic_ids	= numaq_setup_ioapic_ids,
+	.wakeup_secondary_cpu	= wakeup_secondary_cpu_via_nmi,
 };
 
 void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7b1093397319..498c1ef37fe0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -62,6 +62,7 @@
 #include <asm/mtrr.h>
 #include <asm/vmi.h>
 #include <asm/genapic.h>
+#include <asm/setup.h>
 #include <linux/mc146818rtc.h>
 
 #include <mach_apic.h>
@@ -536,7 +537,7 @@ static void impress_friends(void)
 	pr_debug("Before bogocount - setting activated=1.\n");
 }
 
-static inline void __inquire_remote_apic(int apicid)
+void __inquire_remote_apic(int apicid)
 {
 	unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
 	char *names[] = { "ID", "VERSION", "SPIV" };
@@ -575,14 +576,13 @@ static inline void __inquire_remote_apic(int apicid)
 	}
 }
 
-#ifdef WAKE_SECONDARY_VIA_NMI
 /*
  * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
  * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
  * won't ... remember to clear down the APIC, etc later.
  */
-static int __devinit
-wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
+int __devinit
+wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
 {
 	unsigned long send_status, accept_status = 0;
 	int maxlvt;
@@ -599,7 +599,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
 	 * Give the other CPU some time to accept the IPI.
 	 */
 	udelay(200);
-	if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
 		maxlvt = lapic_get_maxlvt();
 		if (maxlvt > 3)			/* Due to the Pentium erratum 3AP.  */
 			apic_write(APIC_ESR, 0);
@@ -614,11 +614,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
 
 	return (send_status | accept_status);
 }
-#endif	/* WAKE_SECONDARY_VIA_NMI */
 
-#ifdef WAKE_SECONDARY_VIA_INIT
 static int __devinit
-wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
+wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 {
 	unsigned long send_status, accept_status = 0;
 	int maxlvt, num_starts, j;
@@ -737,7 +735,15 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
 
 	return (send_status | accept_status);
 }
-#endif	/* WAKE_SECONDARY_VIA_INIT */
+
+static int __devinit
+wakeup_secondary_cpu(int apicid, unsigned long start_eip)
+{
+	if (x86_quirks->wakeup_secondary_cpu)
+		return x86_quirks->wakeup_secondary_cpu(apicid, start_eip);
+
+	return wakeup_secondary_cpu_via_init(apicid, start_eip);
+}
 
 struct create_idle {
 	struct work_struct work;
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 3c3b471ea496..3624a364b7f3 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -17,6 +17,7 @@
 #include <asm/bigsmp/apic.h>
 #include <asm/bigsmp/ipi.h>
 #include <asm/mach-default/mach_mpparse.h>
+#include <asm/mach-default/mach_wakecpu.h>
 
 static int dmi_bigsmp; /* can be set by dmi scanners */
 
diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c
index 9e835a11a13a..e63a4a76d8cd 100644
--- a/arch/x86/mach-generic/default.c
+++ b/arch/x86/mach-generic/default.c
@@ -16,6 +16,7 @@
 #include <asm/mach-default/mach_apic.h>
 #include <asm/mach-default/mach_ipi.h>
 #include <asm/mach-default/mach_mpparse.h>
+#include <asm/mach-default/mach_wakecpu.h>
 
 /* should be called last. */
 static int probe_default(void)
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index 6272b5e69da6..2c6d234e0009 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -16,6 +16,7 @@
 #include <asm/summit/apic.h>
 #include <asm/summit/ipi.h>
 #include <asm/summit/mpparse.h>
+#include <asm/mach-default/mach_wakecpu.h>
 
 static int probe_summit(void)
 {
-- 
cgit v1.2.3-70-g09d2


From 54ac14a8e982ae6c7ac71ee2b0d0173b974509e2 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 17 Nov 2008 15:19:53 -0800
Subject: x86: fix wakeup_cpu with numaq/es7000, v2, fix

Impact: fix wakeup_secondary_cpu with hotplug

We can not put that into x86_quirks, because that is __initdata.
So try to move that to genapic, and add update_genapic in x86_quirks.

later we even could use that stub to:

 1. autodetect CONFIG_ES7000_CLUSTERED_APIC
 2. more correct inquire_remote_apic with apic_verbosity setting.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/genapic_32.h             |  1 +
 arch/x86/include/asm/genapic_64.h             |  2 ++
 arch/x86/include/asm/mach-default/mach_apic.h |  2 ++
 arch/x86/include/asm/mach-generic/mach_apic.h |  1 +
 arch/x86/include/asm/setup.h                  |  3 ++-
 arch/x86/kernel/es7000_32.c                   | 11 ++++++++++-
 arch/x86/kernel/genapic_64.c                  |  4 ++++
 arch/x86/kernel/numaq_32.c                    | 11 +++++++++--
 arch/x86/kernel/setup.c                       | 13 ++++++++++++-
 arch/x86/kernel/smpboot.c                     | 11 +----------
 arch/x86/mach-generic/probe.c                 |  4 ++++
 11 files changed, 48 insertions(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 39bd8c1db3f5..455d6c27a98b 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -66,6 +66,7 @@ struct genapic {
 	void (*send_IPI_allbutself)(int vector);
 	void (*send_IPI_all)(int vector);
 #endif
+	int (*wakeup_cpu)(int apicid, unsigned long start_eip);
 	int trampoline_phys_low;
 	int trampoline_phys_high;
 	void (*wait_for_init_deassert)(atomic_t *deassert);
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index 13c4e96199ea..2cae011668b7 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -32,6 +32,8 @@ struct genapic {
 	unsigned int (*get_apic_id)(unsigned long x);
 	unsigned long (*set_apic_id)(unsigned int id);
 	unsigned long apic_id_mask;
+	/* wakeup_secondary_cpu */
+	int (*wakeup_cpu)(int apicid, unsigned long start_eip);
 };
 
 extern struct genapic *genapic;
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index ff3a6c236c00..6cb3a467e067 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -32,11 +32,13 @@ static inline cpumask_t target_cpus(void)
 #define vector_allocation_domain    (genapic->vector_allocation_domain)
 #define read_apic_id()  (GET_APIC_ID(apic_read(APIC_ID)))
 #define send_IPI_self (genapic->send_IPI_self)
+#define wakeup_secondary_cpu (genapic->wakeup_cpu)
 extern void setup_apic_routing(void);
 #else
 #define INT_DELIVERY_MODE dest_LowestPrio
 #define INT_DEST_MODE 1     /* logical delivery broadcast to all procs */
 #define TARGET_CPUS (target_cpus())
+#define wakeup_secondary_cpu wakeup_secondary_cpu_via_init
 /*
  * Set up the logical destination ID.
  *
diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h
index 5180bd7478fb..e430f47df667 100644
--- a/arch/x86/include/asm/mach-generic/mach_apic.h
+++ b/arch/x86/include/asm/mach-generic/mach_apic.h
@@ -27,6 +27,7 @@
 #define vector_allocation_domain (genapic->vector_allocation_domain)
 #define enable_apic_mode (genapic->enable_apic_mode)
 #define phys_pkg_id (genapic->phys_pkg_id)
+#define wakeup_secondary_cpu (genapic->wakeup_cpu)
 
 extern void generic_bigsmp_probe(void);
 
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 40b2d3304911..294daeb3a006 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -17,6 +17,7 @@ static inline int is_visws_box(void) { return 0; }
 #endif
 
 extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
+extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip);
 /*
  * Any setup quirks to be performed?
  */
@@ -40,7 +41,7 @@ struct x86_quirks {
 	void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
                                     unsigned short oemsize);
 	int (*setup_ioapic_ids)(void);
-	int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
+	int (*update_genapic)(void);
 };
 
 extern struct x86_quirks *x86_quirks;
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
index bed10dddf099..fb3bfe66fbe2 100644
--- a/arch/x86/kernel/es7000_32.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -40,6 +40,7 @@
 #include <asm/smp.h>
 #include <asm/apicdef.h>
 #include <mach_mpparse.h>
+#include <asm/genapic.h>
 #include <asm/setup.h>
 
 /*
@@ -180,6 +181,13 @@ static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
 
 	return 0;
 }
+
+static int __init es7000_update_genapic(void)
+{
+	genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
+
+	return 0;
+}
 #endif
 
 void __init
@@ -197,8 +205,9 @@ setup_unisys(void)
 	else
 		es7000_plat = ES7000_CLASSIC;
 	ioapic_renumber_irq = es7000_rename_gsi;
+
 #ifdef CONFIG_ES7000_CLUSTERED_APIC
-	x86_quirks->wakeup_secondary_cpu = wakeup_secondary_cpu_via_mip;
+	x86_quirks->update_genapic = es7000_update_genapic;
 #endif
 }
 
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 6c9bfc9e1e95..2bced78b0b8e 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -21,6 +21,7 @@
 #include <asm/smp.h>
 #include <asm/ipi.h>
 #include <asm/genapic.h>
+#include <asm/setup.h>
 
 extern struct genapic apic_flat;
 extern struct genapic apic_physflat;
@@ -53,6 +54,9 @@ void __init setup_apic_routing(void)
 			genapic = &apic_physflat;
 		printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
 	}
+
+	if (x86_quirks->update_genapic)
+		x86_quirks->update_genapic();
 }
 
 /* Same for both flat and physical. */
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index 745891b7d0fb..0deea37a53cf 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -31,7 +31,7 @@
 #include <asm/numaq.h>
 #include <asm/topology.h>
 #include <asm/processor.h>
-#include <asm/mpspec.h>
+#include <asm/genapic.h>
 #include <asm/e820.h>
 #include <asm/setup.h>
 
@@ -235,6 +235,13 @@ static int __init numaq_setup_ioapic_ids(void)
 	return 1;
 }
 
+static int __init numaq_update_genapic(void)
+{
+	genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
+
+	return 0;
+}
+
 static struct x86_quirks numaq_x86_quirks __initdata = {
 	.arch_pre_time_init	= numaq_pre_time_init,
 	.arch_time_init		= NULL,
@@ -250,7 +257,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
 	.mpc_oem_pci_bus	= mpc_oem_pci_bus,
 	.smp_read_mpc_oem	= smp_read_mpc_oem,
 	.setup_ioapic_ids	= numaq_setup_ioapic_ids,
-	.wakeup_secondary_cpu	= wakeup_secondary_cpu_via_nmi,
+	.update_genapic		= numaq_update_genapic,
 };
 
 void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0fa6790c1dd3..c366e891e10b 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -583,7 +583,18 @@ static int __init setup_elfcorehdr(char *arg)
 early_param("elfcorehdr", setup_elfcorehdr);
 #endif
 
-static struct x86_quirks default_x86_quirks __initdata;
+static int __init default_update_genapic(void)
+{
+#if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64)
+	genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
+#endif
+
+	return 0;
+}
+
+static struct x86_quirks default_x86_quirks __initdata = {
+	.update_genapic         = default_update_genapic,
+};
 
 struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 498c1ef37fe0..0e9f446269f4 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -615,7 +615,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
 	return (send_status | accept_status);
 }
 
-static int __devinit
+int __devinit
 wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 {
 	unsigned long send_status, accept_status = 0;
@@ -736,15 +736,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 	return (send_status | accept_status);
 }
 
-static int __devinit
-wakeup_secondary_cpu(int apicid, unsigned long start_eip)
-{
-	if (x86_quirks->wakeup_secondary_cpu)
-		return x86_quirks->wakeup_secondary_cpu(apicid, start_eip);
-
-	return wakeup_secondary_cpu_via_init(apicid, start_eip);
-}
-
 struct create_idle {
 	struct work_struct work;
 	struct task_struct *idle;
diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c
index 5a7e4619e1c4..90b134f3cd74 100644
--- a/arch/x86/mach-generic/probe.c
+++ b/arch/x86/mach-generic/probe.c
@@ -15,6 +15,7 @@
 #include <asm/mpspec.h>
 #include <asm/apicdef.h>
 #include <asm/genapic.h>
+#include <asm/setup.h>
 
 extern struct genapic apic_numaq;
 extern struct genapic apic_summit;
@@ -57,6 +58,9 @@ static int __init parse_apic(char *arg)
 		}
 	}
 
+	if (x86_quirks->update_genapic)
+		x86_quirks->update_genapic();
+
 	/* Parsed again by __setup for debug/verbose */
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From a1afd01c175324656d0e8f1c82ea94b474953c04 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 18 Nov 2008 12:44:21 +0100
Subject: x86: default to SWIOTLB=y on x86_64

Impact: fixes korg bugzilla 11980

A kernel for a 64bit x86 system should always contain the swiotlb code
in case it is booted on a machine without any hardware IOMMU supported
by the kernel and more than 4GB of RAM. This patch changes Kconfig to
always compile swiotlb into the kernel for x86_64.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: stable@kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 93224b569187..669c6d588bde 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -566,7 +566,7 @@ config AMD_IOMMU
 
 # need this always selected by IOMMU for the VIA workaround
 config SWIOTLB
-	bool
+	def_bool y if X86_64
 	help
 	  Support for software bounce buffers used on x86-64 systems
 	  which don't have a hardware IOMMU (e.g. the current generation
-- 
cgit v1.2.3-70-g09d2


From b78a5b5260abf90d574911e7c7b8d35d5b48d6c0 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Mon, 17 Nov 2008 15:44:50 -0800
Subject: x86: ia32_signal: cleanup macro COPY

Impact: cleanup

No need to use temporary variable in this case.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index e2d0bc779bf7..610a17774ea2 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -197,10 +197,8 @@ struct rt_sigframe
 	/* fp state follows here */
 };
 
-#define COPY(x)		{ 		\
-	unsigned int reg;		\
-	err |= __get_user(reg, &sc->x);	\
-	regs->x = reg;			\
+#define COPY(x)			{		\
+	err |= __get_user(regs->x, &sc->x);	\
 }
 
 #define RELOAD_SEG(seg,mask)						\
-- 
cgit v1.2.3-70-g09d2


From d71a68dca54756049e0eae62458a1705bf680d09 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Mon, 17 Nov 2008 15:47:06 -0800
Subject: x86: ia32_signal: introduce COPY_SEG_CPL3

Impact: cleanup

Introduce COPY_SEG_CPL3 for ia32_restore_sigcontext().

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 610a17774ea2..fe44c314c9c0 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -201,6 +201,12 @@ struct rt_sigframe
 	err |= __get_user(regs->x, &sc->x);	\
 }
 
+#define COPY_SEG_CPL3(seg)	{			\
+		unsigned short tmp;			\
+		err |= __get_user(tmp, &sc->seg);	\
+		regs->seg = tmp | 3;			\
+}
+
 #define RELOAD_SEG(seg,mask)						\
 	{ unsigned int cur;						\
 	  unsigned short pre;						\
@@ -246,10 +252,8 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 	COPY(dx); COPY(cx); COPY(ip);
 	/* Don't touch extended registers */
 
-	err |= __get_user(regs->cs, &sc->cs);
-	regs->cs |= 3;
-	err |= __get_user(regs->ss, &sc->ss);
-	regs->ss |= 3;
+	COPY_SEG_CPL3(cs);
+	COPY_SEG_CPL3(ss);
 
 	err |= __get_user(tmpflags, &sc->flags);
 	regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
-- 
cgit v1.2.3-70-g09d2


From 8c6e5ce0fd67c57ad5e19d1718e1250214e855db Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Mon, 17 Nov 2008 15:47:48 -0800
Subject: x86: ia32_signal: cleanup macro RELOAD_SEG

Impact: cleanup

Remove mask parameter because it's always 3.
Cleanup coding styles.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Reviewed-by: WANG Cong <wangcong@zeuux.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index fe44c314c9c0..2c56e6857d1a 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -207,13 +207,14 @@ struct rt_sigframe
 		regs->seg = tmp | 3;			\
 }
 
-#define RELOAD_SEG(seg,mask)						\
-	{ unsigned int cur;						\
-	  unsigned short pre;						\
-	  err |= __get_user(pre, &sc->seg);				\
-	  savesegment(seg, cur);					\
-	  pre |= mask;							\
-	  if (pre != cur) loadsegment(seg, pre); }
+#define RELOAD_SEG(seg)		{		\
+	unsigned int cur, pre;			\
+	err |= __get_user(pre, &sc->seg);	\
+	savesegment(seg, cur);			\
+	pre |= 3;				\
+	if (pre != cur)				\
+		loadsegment(seg, pre);		\
+}
 
 static int ia32_restore_sigcontext(struct pt_regs *regs,
 				   struct sigcontext_ia32 __user *sc,
@@ -244,9 +245,9 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 	if (gs != oldgs)
 		load_gs_index(gs);
 
-	RELOAD_SEG(fs, 3);
-	RELOAD_SEG(ds, 3);
-	RELOAD_SEG(es, 3);
+	RELOAD_SEG(fs);
+	RELOAD_SEG(ds);
+	RELOAD_SEG(es);
 
 	COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
 	COPY(dx); COPY(cx); COPY(ip);
-- 
cgit v1.2.3-70-g09d2


From 047ce93581ca122442ed3c13a62a645249a7db1d Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Mon, 17 Nov 2008 15:48:27 -0800
Subject: x86: ia32_signal: remove using temporary variable

Impact: cleanup

No need to use temporary variable.
Also rename the variable same as arch/x86/kernel/signal_32.c.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Reviewed-by: WANG Cong <wangcong@zeuux.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 2c56e6857d1a..e591e381611b 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -218,7 +218,7 @@ struct rt_sigframe
 
 static int ia32_restore_sigcontext(struct pt_regs *regs,
 				   struct sigcontext_ia32 __user *sc,
-				   unsigned int *peax)
+				   unsigned int *pax)
 {
 	unsigned int tmpflags, gs, oldgs, err = 0;
 	void __user *buf;
@@ -265,9 +265,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 	buf = compat_ptr(tmp);
 	err |= restore_i387_xstate_ia32(buf);
 
-	err |= __get_user(tmp, &sc->ax);
-	*peax = tmp;
-
+	err |= __get_user(*pax, &sc->ax);
 	return err;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 64977609e316c86fad513d9bf0afff998581e59d Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Mon, 17 Nov 2008 15:49:14 -0800
Subject: x86: ia32_signal: change order of storing in setup_sigcontext()

Impact: cleanup

Change order of storing to match the sigcontext_ia32.
And add casting to make this code same as arch/x86/kernel/signal_32.c.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index e591e381611b..1267977e7708 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -360,13 +360,13 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
 	err |= __put_user(regs->dx, &sc->dx);
 	err |= __put_user(regs->cx, &sc->cx);
 	err |= __put_user(regs->ax, &sc->ax);
-	err |= __put_user(regs->cs, &sc->cs);
-	err |= __put_user(regs->ss, &sc->ss);
 	err |= __put_user(current->thread.trap_no, &sc->trapno);
 	err |= __put_user(current->thread.error_code, &sc->err);
 	err |= __put_user(regs->ip, &sc->ip);
+	err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
 	err |= __put_user(regs->flags, &sc->flags);
 	err |= __put_user(regs->sp, &sc->sp_at_signal);
+	err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
 
 	err |= __put_user(ptr_to_compat(fpstate), &sc->fpstate);
 
-- 
cgit v1.2.3-70-g09d2


From f632ddcc0786149c0e4bef9b6b44c96a75c0d074 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 18 Nov 2008 17:32:26 +0100
Subject: x86: fix wakeup_cpu with numaq/es7000, v2, fix #2

Impact: fix boot crash

fix default_update_genapic().

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c366e891e10b..31328909456e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -585,8 +585,10 @@ early_param("elfcorehdr", setup_elfcorehdr);
 
 static int __init default_update_genapic(void)
 {
-#if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64)
-	genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
+#ifdef CONFIG_X86_SMP
+# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64)
+	genapic->wakeup_cpu = wakeup_secondary_cpu_via_init;
+# endif
 #endif
 
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From b5fe363b7d89577fcfda9b6cf0efc32760bbccc6 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 18 Nov 2008 08:14:14 -0800
Subject: x86: use update_genapic to get rid of ES7000_CLUSTERED_APIC v2

Impact: clean up

We can autodetect those system that need cluster apic, and update genapic
accordingly.

We can also remove wakeup.h for e7000, because it's default one is now
the same as overall default mach_wakecpu.h

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig                   |  4 --
 arch/x86/include/asm/es7000/apic.h | 76 ++++++++++++++++++++++++++------------
 arch/x86/include/asm/genapic_32.h  |  1 +
 arch/x86/kernel/es7000_32.c        | 17 +++++++--
 arch/x86/mach-generic/es7000.c     | 14 ++++++-
 5 files changed, 80 insertions(+), 32 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 93224b569187..7d0ab8942cfb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -462,10 +462,6 @@ config X86_CYCLONE_TIMER
 	def_bool y
 	depends on X86_GENERICARCH
 
-config ES7000_CLUSTERED_APIC
-	def_bool y
-	depends on SMP && X86_ES7000 && MPENTIUMIII
-
 source "arch/x86/Kconfig.cpu"
 
 config HPET_TIMER
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index 9d8cf776c285..e24ef876915f 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -9,28 +9,27 @@ static inline int apic_id_registered(void)
 	        return (1);
 }
 
-static inline cpumask_t target_cpus(void)
+static inline cpumask_t target_cpus_cluster(void)
 {
-#if defined CONFIG_ES7000_CLUSTERED_APIC
 	return CPU_MASK_ALL;
-#else
+}
+
+static inline cpumask_t target_cpus(void)
+{
 	return cpumask_of_cpu(smp_processor_id());
-#endif
 }
 
-#if defined CONFIG_ES7000_CLUSTERED_APIC
-#define APIC_DFR_VALUE		(APIC_DFR_CLUSTER)
-#define INT_DELIVERY_MODE	(dest_LowestPrio)
-#define INT_DEST_MODE		(1)    /* logical delivery broadcast to all procs */
-#define NO_BALANCE_IRQ		(1)
-#else
+#define APIC_DFR_VALUE_CLUSTER		(APIC_DFR_CLUSTER)
+#define INT_DELIVERY_MODE_CLUSTER	(dest_LowestPrio)
+#define INT_DEST_MODE_CLUSTER		(1) /* logical delivery broadcast to all procs */
+#define NO_BALANCE_IRQ_CLUSTER		(1)
+
 #define APIC_DFR_VALUE		(APIC_DFR_FLAT)
 #define INT_DELIVERY_MODE	(dest_Fixed)
 #define INT_DEST_MODE		(0)    /* phys delivery to target procs */
 #define NO_BALANCE_IRQ		(0)
 #undef  APIC_DEST_LOGICAL
 #define APIC_DEST_LOGICAL	0x0
-#endif
 
 static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
 {
@@ -57,6 +56,16 @@ static inline unsigned long calculate_ldr(int cpu)
  * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
  * document number 292116).  So here it goes...
  */
+static inline void init_apic_ldr_cluster(void)
+{
+	unsigned long val;
+	int cpu = smp_processor_id();
+
+	apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER);
+	val = calculate_ldr(cpu);
+	apic_write(APIC_LDR, val);
+}
+
 static inline void init_apic_ldr(void)
 {
 	unsigned long val;
@@ -67,10 +76,6 @@ static inline void init_apic_ldr(void)
 	apic_write(APIC_LDR, val);
 }
 
-#ifndef CONFIG_X86_GENERICARCH
-extern void enable_apic_mode(void);
-#endif
-
 extern int apic_version [MAX_APICS];
 static inline void setup_apic_routing(void)
 {
@@ -141,7 +146,7 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid)
 	return (1);
 }
 
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask)
 {
 	int num_bits_set;
 	int cpus_found = 0;
@@ -151,11 +156,7 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
 	num_bits_set = cpus_weight(cpumask);
 	/* Return id to all */
 	if (num_bits_set == NR_CPUS)
-#if defined CONFIG_ES7000_CLUSTERED_APIC
 		return 0xFF;
-#else
-		return cpu_to_logical_apicid(0);
-#endif
 	/*
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
@@ -168,11 +169,40 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)){
 				printk ("%s: Not a valid mask!\n", __func__);
-#if defined CONFIG_ES7000_CLUSTERED_APIC
 				return 0xFF;
-#else
+			}
+			apicid = new_apicid;
+			cpus_found++;
+		}
+		cpu++;
+	}
+	return apicid;
+}
+
+static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+{
+	int num_bits_set;
+	int cpus_found = 0;
+	int cpu;
+	int apicid;
+
+	num_bits_set = cpus_weight(cpumask);
+	/* Return id to all */
+	if (num_bits_set == NR_CPUS)
+		return cpu_to_logical_apicid(0);
+	/*
+	 * The cpus in the mask must all be on the apic cluster.  If are not
+	 * on the same apicid cluster return default value of TARGET_CPUS.
+	 */
+	cpu = first_cpu(cpumask);
+	apicid = cpu_to_logical_apicid(cpu);
+	while (cpus_found < num_bits_set) {
+		if (cpu_isset(cpu, cpumask)) {
+			int new_apicid = cpu_to_logical_apicid(cpu);
+			if (apicid_cluster(apicid) !=
+					apicid_cluster(new_apicid)){
+				printk ("%s: Not a valid mask!\n", __func__);
 				return cpu_to_logical_apicid(0);
-#endif
 			}
 			apicid = new_apicid;
 			cpus_found++;
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 455d6c27a98b..0ac17d33a8c7 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -131,6 +131,7 @@ struct genapic {
 }
 
 extern struct genapic *genapic;
+extern void es7000_update_genapic_to_cluster(void);
 
 enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
 #define get_uv_system_type()		UV_NONE
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
index fb3bfe66fbe2..71d7be624d46 100644
--- a/arch/x86/kernel/es7000_32.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -38,6 +38,7 @@
 #include <asm/io.h>
 #include <asm/nmi.h>
 #include <asm/smp.h>
+#include <asm/atomic.h>
 #include <asm/apicdef.h>
 #include <mach_mpparse.h>
 #include <asm/genapic.h>
@@ -163,7 +164,6 @@ es7000_rename_gsi(int ioapic, int gsi)
 	return gsi;
 }
 
-#ifdef CONFIG_ES7000_CLUSTERED_APIC
 static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
 {
 	unsigned long vect = 0, psaival = 0;
@@ -182,13 +182,24 @@ static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
 	return 0;
 }
 
+static void noop_wait_for_deassert(atomic_t *deassert_not_used)
+{
+}
+
 static int __init es7000_update_genapic(void)
 {
 	genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
 
+	/* MPENTIUMIII */
+	if (boot_cpu_data.x86 == 6 &&
+	    (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) {
+		es7000_update_genapic_to_cluster();
+		genapic->wait_for_init_deassert = noop_wait_for_deassert;
+		genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
+	}
+
 	return 0;
 }
-#endif
 
 void __init
 setup_unisys(void)
@@ -206,9 +217,7 @@ setup_unisys(void)
 		es7000_plat = ES7000_CLASSIC;
 	ioapic_renumber_irq = es7000_rename_gsi;
 
-#ifdef CONFIG_ES7000_CLUSTERED_APIC
 	x86_quirks->update_genapic = es7000_update_genapic;
-#endif
 }
 
 /*
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 28459cab3ddb..7b4e6d0d1690 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -16,7 +16,19 @@
 #include <asm/es7000/apic.h>
 #include <asm/es7000/ipi.h>
 #include <asm/es7000/mpparse.h>
-#include <asm/es7000/wakecpu.h>
+#include <asm/mach-default/mach_wakecpu.h>
+
+void __init es7000_update_genapic_to_cluster(void)
+{
+	genapic->target_cpus = target_cpus_cluster;
+	genapic->int_delivery_mode = INT_DELIVERY_MODE_CLUSTER;
+	genapic->int_dest_mode = INT_DEST_MODE_CLUSTER;
+	genapic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER;
+
+	genapic->init_apic_ldr = init_apic_ldr_cluster;
+
+	genapic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster;
+}
 
 static int probe_es7000(void)
 {
-- 
cgit v1.2.3-70-g09d2


From 77be80e437fec44f8b7a620314b7d7b605b8d93b Mon Sep 17 00:00:00 2001
From: "Richard A. Holden III" <aciddeath@gmail.com>
Date: Wed, 19 Nov 2008 16:05:14 -0700
Subject: x86: fix arch/x86/kernel/genx2apic_uv_x.c build warning when
 !CONFIG_HOTPLUG_CPU

Impact: cleanup, reduce size of the kernel image a bit

Fix:

  arch/x86/kernel/genx2apic_uv_x.c:403: warning: 'uv_heartbeat_disable' defined but not used

the function is only used when CONFIG_HOTPLUG_CPU is defined.

Signed-off-by: Richard A. Holden III <aciddeath@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/genx2apic_uv_x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index f02bbe5d0178..221299f4509f 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -400,6 +400,7 @@ static void __cpuinit uv_heartbeat_enable(int cpu)
 		uv_heartbeat_enable(0);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 static void __cpuinit uv_heartbeat_disable(int cpu)
 {
 	if (uv_cpu_hub_info(cpu)->scir.enabled) {
@@ -409,7 +410,6 @@ static void __cpuinit uv_heartbeat_disable(int cpu)
 	uv_set_cpu_scir_bits(cpu, 0xff);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 /*
  * cpu hotplug notifier
  */
-- 
cgit v1.2.3-70-g09d2


From bb5574608a8375026510b4f983ffbb06ece33fe2 Mon Sep 17 00:00:00 2001
From: "Richard A. Holden III" <aciddeath@gmail.com>
Date: Wed, 19 Nov 2008 16:05:15 -0700
Subject: x86: fix arch/x86/kernel/setup.c build warning when
 !CONFIG_X86_RESERVE_LOW_64K

Impact: cleanup

Fix:

  arch/x86/kernel/setup.c:592: warning: 'dmi_low_memory_corruption' defined but not used

this is only used if CONFIG_X86_RESERVE_LOW_64K is defined.

Signed-off-by: Richard A. Holden III <aciddeath@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index e6c51433247d..13a5f592ac28 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -587,6 +587,7 @@ static struct x86_quirks default_x86_quirks __initdata;
 
 struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
 
+#ifdef CONFIG_X86_RESERVE_LOW_64K
 static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
 {
 	printk(KERN_NOTICE
@@ -598,6 +599,7 @@ static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
 
 	return 0;
 }
+#endif
 
 /* List of systems that have known low memory corruption BIOS problems */
 static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
-- 
cgit v1.2.3-70-g09d2


From 87f7606591aea6a8a38ea4c8911b5eeeee2740b8 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 19 Nov 2008 20:50:53 -0800
Subject: x86: fix wakeup_cpu with numaq/es7000 v2 - call ->update_genapic()

Impact: fix boot crash on 32-bit

Hiroshi Shimamoto reported a boot failure on 32-bit x86.

The setting of x86_quirks.wakeup_cpu is missing (when
not passing in an explicit apic= boot parameter).

Reported-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mach-generic/probe.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c
index 90b134f3cd74..c346d9d0226f 100644
--- a/arch/x86/mach-generic/probe.c
+++ b/arch/x86/mach-generic/probe.c
@@ -76,12 +76,15 @@ void __init generic_bigsmp_probe(void)
 	 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
 	 */
 
-	if (!cmdline_apic && genapic == &apic_default)
+	if (!cmdline_apic && genapic == &apic_default) {
 		if (apic_bigsmp.probe()) {
 			genapic = &apic_bigsmp;
+			if (x86_quirks->update_genapic)
+				x86_quirks->update_genapic();
 			printk(KERN_INFO "Overriding APIC driver with %s\n",
 			       genapic->name);
 		}
+	}
 #endif
 }
 
@@ -98,6 +101,9 @@ void __init generic_apic_probe(void)
 		/* Not visible without early console */
 		if (!apic_probe[i])
 			panic("Didn't find an APIC driver");
+
+		if (x86_quirks->update_genapic)
+			x86_quirks->update_genapic();
 	}
 	printk(KERN_INFO "Using APIC driver %s\n", genapic->name);
 }
@@ -112,6 +118,8 @@ int __init mps_oem_check(struct mp_config_table *mpc, char *oem,
 		if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) {
 			if (!cmdline_apic) {
 				genapic = apic_probe[i];
+				if (x86_quirks->update_genapic)
+					x86_quirks->update_genapic();
 				printk(KERN_INFO "Switched to APIC driver `%s'.\n",
 				       genapic->name);
 			}
@@ -128,6 +136,8 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 		if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
 			if (!cmdline_apic) {
 				genapic = apic_probe[i];
+				if (x86_quirks->update_genapic)
+					x86_quirks->update_genapic();
 				printk(KERN_INFO "Switched to APIC driver `%s'.\n",
 				       genapic->name);
 			}
-- 
cgit v1.2.3-70-g09d2


From d99015b1abbad743aa049b439c1e1dede6d0fa49 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Wed, 19 Nov 2008 01:18:11 +0100
Subject: x86: move entry_64.S register saving out of the macros

Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.

The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:

<common_interrupt>:
(5)  addq   $0xffffffffffffff80,(%rsp)		/* -> ~(vector) */
(4)  sub    $0x50,%rsp				/* space for registers */
(5)  callq  ffffffff80211290 <save_args>
(5)  callq  ffffffff80214290 <do_IRQ>
<ret_from_intr>:
     ...

An apic interrupt stub now look like this:

<thermal_interrupt>:
(5)  pushq  $0xffffffffffffff05			/* ~(vector) */
(4)  sub    $0x50,%rsp				/* space for registers */
(5)  callq  ffffffff80211290 <save_args>
(5)  callq  ffffffff80212b8f <smp_thermal_interrupt>
(5)  jmpq   ffffffff80211f93 <ret_from_intr>

Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:

<overflow>:
(6)  callq  *0x1cad12(%rip)        # ffffffff803dd448 <pv_irq_ops+0x38>
(2)  pushq  $0xffffffffffffffff			/* no syscall */
(4)  sub    $0x78,%rsp				/* space for registers */
(5)  callq  ffffffff8030e3b0 <error_entry>
(3)  mov    %rsp,%rdi				/* pt_regs pointer */
(2)  xor    %esi,%esi				/* no error code */
(5)  callq  ffffffff80213446 <do_overflow>
(5)  jmpq   ffffffff8030e460 <error_exit>

And one for an exception with errorcode like this:

<segment_not_present>:
(6)  callq  *0x1cab92(%rip)        # ffffffff803dd448 <pv_irq_ops+0x38>
(4)  sub    $0x78,%rsp				/* space for registers */
(5)  callq  ffffffff8030e3b0 <error_entry>
(3)  mov    %rsp,%rdi				/* pt_regs pointer */
(5)  mov    0x78(%rsp),%rsi			/* load error code */
(9)  movq   $0xffffffffffffffff,0x78(%rsp)	/* no syscall */
(5)  callq  ffffffff80213209 <do_segment_not_present>
(5)  jmpq   ffffffff8030e460 <error_exit>

Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).

Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S | 300 +++++++++++++++++++++++++--------------------
 1 file changed, 166 insertions(+), 134 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index dbf06a0ef3d5..5a12432ccdf9 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -242,6 +242,78 @@ ENTRY(native_usergs_sysret64)
 	CFI_REL_OFFSET	rsp,RSP
 	/*CFI_REL_OFFSET	ss,SS*/
 	.endm
+
+/*
+ * initial frame state for interrupts and exceptions
+ */
+	.macro _frame ref
+	CFI_STARTPROC simple
+	CFI_SIGNAL_FRAME
+	CFI_DEF_CFA rsp,SS+8-\ref
+	/*CFI_REL_OFFSET ss,SS-\ref*/
+	CFI_REL_OFFSET rsp,RSP-\ref
+	/*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
+	/*CFI_REL_OFFSET cs,CS-\ref*/
+	CFI_REL_OFFSET rip,RIP-\ref
+	.endm
+
+/*
+ * initial frame state for interrupts (and exceptions without error code)
+ */
+#define INTR_FRAME _frame RIP
+/*
+ * initial frame state for exceptions with error code (and interrupts
+ * with vector already pushed)
+ */
+#define XCPT_FRAME _frame ORIG_RAX
+
+/* save partial stack frame */
+ENTRY(save_args)
+	XCPT_FRAME
+	cld
+	movq  %rdi, 8*8+16(%rsp)
+	CFI_REL_OFFSET rdi, 8*8+16
+	movq  %rsi, 7*8+16(%rsp)
+	CFI_REL_OFFSET rsi, 7*8+16
+	movq  %rdx, 6*8+16(%rsp)
+	CFI_REL_OFFSET rdx, 6*8+16
+	movq  %rcx, 5*8+16(%rsp)
+	CFI_REL_OFFSET rcx, 5*8+16
+	movq  %rax, 4*8+16(%rsp)
+	CFI_REL_OFFSET rax, 4*8+16
+	movq  %r8, 3*8+16(%rsp)
+	CFI_REL_OFFSET r8, 3*8+16
+	movq  %r9, 2*8+16(%rsp)
+	CFI_REL_OFFSET r9, 2*8+16
+	movq  %r10, 1*8+16(%rsp)
+	CFI_REL_OFFSET r10, 1*8+16
+	movq  %r11, 0*8+16(%rsp)
+	CFI_REL_OFFSET r11, 0*8+16
+	leaq -ARGOFFSET+16(%rsp),%rdi	/* arg1 for handler */
+	movq %rbp, 8(%rsp)		/* push %rbp */
+	leaq 8(%rsp), %rbp		/* mov %rsp, %ebp */
+	testl $3, CS(%rdi)
+	je 1f
+	SWAPGS
+	/*
+	 * irqcount is used to check if a CPU is already on an interrupt stack
+	 * or not. While this is essentially redundant with preempt_count it is
+	 * a little cheaper to use a separate counter in the PDA (short of
+	 * moving irq_enter into assembly, which would be too much work)
+	 */
+1:	incl %gs:pda_irqcount
+	jne 2f
+	pop %rax			/* move return address... */
+	mov %gs:pda_irqstackptr,%rsp
+	push %rax			/* ... to the new stack */
+	/*
+	 * We entered an interrupt context - irqs are off:
+	 */
+2:	TRACE_IRQS_OFF
+	ret
+	CFI_ENDPROC
+END(save_args)
+
 /*
  * A newly forked process directly context switches into this.
  */
@@ -607,26 +679,6 @@ ENTRY(stub_rt_sigreturn)
 	CFI_ENDPROC
 END(stub_rt_sigreturn)
 
-/*
- * initial frame state for interrupts and exceptions
- */
-	.macro _frame ref
-	CFI_STARTPROC simple
-	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA rsp,SS+8-\ref
-	/*CFI_REL_OFFSET ss,SS-\ref*/
-	CFI_REL_OFFSET rsp,RSP-\ref
-	/*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
-	/*CFI_REL_OFFSET cs,CS-\ref*/
-	CFI_REL_OFFSET rip,RIP-\ref
-	.endm
-
-/* initial frame state for interrupts (and exceptions without error code) */
-#define INTR_FRAME _frame RIP
-/* initial frame state for exceptions with error code (and interrupts with
-   vector already pushed) */
-#define XCPT_FRAME _frame ORIG_RAX
-
 /*
  * Build the entry stubs and pointer table with some assembler magic.
  * We pack 7 stubs into a single 32-byte chunk, which will fit in a
@@ -667,46 +719,19 @@ END(irq_entries_start)
 END(interrupt)
 .previous
 
-/* 
+/*
  * Interrupt entry/exit.
  *
  * Interrupt entry points save only callee clobbered registers in fast path.
- *	
- * Entry runs with interrupts off.	
- */ 
+ *
+ * Entry runs with interrupts off.
+ */
 
 /* 0(%rsp): ~(interrupt number) */
 	.macro interrupt func
-	cld
-	SAVE_ARGS
-	leaq -ARGOFFSET(%rsp),%rdi	/* arg1 for handler */
-	pushq %rbp
-	/*
-	 * Save rbp twice: One is for marking the stack frame, as usual, and the
-	 * other, to fill pt_regs properly. This is because bx comes right
-	 * before the last saved register in that structure, and not bp. If the
-	 * base pointer were in the place bx is today, this would not be needed.
-	 */
-	movq %rbp, -8(%rsp)
-	CFI_ADJUST_CFA_OFFSET	8
-	CFI_REL_OFFSET		rbp, 0
-	movq %rsp,%rbp
-	CFI_DEF_CFA_REGISTER	rbp
-	testl $3,CS(%rdi)
-	je 1f
-	SWAPGS
-	/* irqcount is used to check if a CPU is already on an interrupt
-	   stack or not. While this is essentially redundant with preempt_count
-	   it is a little cheaper to use a separate counter in the PDA
-	   (short of moving irq_enter into assembly, which would be too
-	    much work) */
-1:	incl	%gs:pda_irqcount
-	cmoveq %gs:pda_irqstackptr,%rsp
-	push    %rbp			# backlink for old unwinder
-	/*
-	 * We entered an interrupt context - irqs are off:
-	 */
-	TRACE_IRQS_OFF
+	subq $10*8, %rsp
+	CFI_ADJUST_CFA_OFFSET 10*8
+	call save_args
 	call \func
 	.endm
 
@@ -852,6 +877,8 @@ END(common_interrupt)
 /*
  * APIC interrupts.
  */
+	.p2align 5
+
 	.macro apicinterrupt num,func
 	INTR_FRAME
 	pushq $~(\num)
@@ -922,24 +949,29 @@ END(spurious_interrupt)
 	.macro zeroentry sym
 	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
-	pushq $0	/* push error code/oldrax */
+	pushq $-1		/* ORIG_RAX: no syscall to restart */
 	CFI_ADJUST_CFA_OFFSET 8
-	pushq %rax	/* push real oldrax to the rdi slot */
-	CFI_ADJUST_CFA_OFFSET 8
-	CFI_REL_OFFSET rax,0
-	leaq  \sym(%rip),%rax
-	jmp error_entry
+	subq $15*8,%rsp
+	CFI_ADJUST_CFA_OFFSET 15*8
+	call error_entry
+	movq %rsp,%rdi		/* pt_regs pointer */
+	xorl %esi,%esi		/* no error code */
+	call \sym
+	jmp error_exit		/* %ebx: no swapgs flag */
 	CFI_ENDPROC
 	.endm
 
 	.macro errorentry sym
 	XCPT_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
-	pushq %rax
-	CFI_ADJUST_CFA_OFFSET 8
-	CFI_REL_OFFSET rax,0
-	leaq  \sym(%rip),%rax
-	jmp error_entry
+	subq $15*8,%rsp
+	CFI_ADJUST_CFA_OFFSET 15*8
+	call error_entry
+	movq %rsp,%rdi			/* pt_regs pointer */
+	movq ORIG_RAX(%rsp),%rsi	/* get error code */
+	movq $-1,ORIG_RAX(%rsp)		/* no syscall to restart */
+	call \sym
+	jmp error_exit			/* %ebx: no swapgs flag */
 	CFI_ENDPROC
 	.endm
 
@@ -1043,93 +1075,93 @@ paranoid_schedule\trace:
 	.endm
 
 /*
- * Exception entry point. This expects an error code/orig_rax on the stack
- * and the exception handler in %rax.
+ * Exception entry point. This expects an error code/orig_rax on the stack.
+ * returns in "no swapgs flag" in %ebx.
  */
 KPROBE_ENTRY(error_entry)
 	_frame RDI
-	CFI_REL_OFFSET rax,0
-	/* rdi slot contains rax, oldrax contains error code */
+	CFI_ADJUST_CFA_OFFSET 15*8
+	/* oldrax contains error code */
 	cld
-	subq  $14*8,%rsp
-	CFI_ADJUST_CFA_OFFSET	(14*8)
-	movq %rsi,13*8(%rsp)
-	CFI_REL_OFFSET	rsi,RSI
-	movq 14*8(%rsp),%rsi	/* load rax from rdi slot */
-	CFI_REGISTER	rax,rsi
-	movq %rdx,12*8(%rsp)
-	CFI_REL_OFFSET	rdx,RDX
-	movq %rcx,11*8(%rsp)
-	CFI_REL_OFFSET	rcx,RCX
-	movq %rsi,10*8(%rsp)	/* store rax */
-	CFI_REL_OFFSET	rax,RAX
-	movq %r8, 9*8(%rsp)
-	CFI_REL_OFFSET	r8,R8
-	movq %r9, 8*8(%rsp)
-	CFI_REL_OFFSET	r9,R9
-	movq %r10,7*8(%rsp)
-	CFI_REL_OFFSET	r10,R10
-	movq %r11,6*8(%rsp)
-	CFI_REL_OFFSET	r11,R11
-	movq %rbx,5*8(%rsp)
-	CFI_REL_OFFSET	rbx,RBX
-	movq %rbp,4*8(%rsp)
-	CFI_REL_OFFSET	rbp,RBP
-	movq %r12,3*8(%rsp)
-	CFI_REL_OFFSET	r12,R12
-	movq %r13,2*8(%rsp)
-	CFI_REL_OFFSET	r13,R13
-	movq %r14,1*8(%rsp)
-	CFI_REL_OFFSET	r14,R14
-	movq %r15,(%rsp)
-	CFI_REL_OFFSET	r15,R15
+	movq %rdi,14*8+8(%rsp)
+	CFI_REL_OFFSET rdi,RDI+8
+	movq %rsi,13*8+8(%rsp)
+	CFI_REL_OFFSET rsi,RSI+8
+	movq %rdx,12*8+8(%rsp)
+	CFI_REL_OFFSET rdx,RDX+8
+	movq %rcx,11*8+8(%rsp)
+	CFI_REL_OFFSET rcx,RCX+8
+	movq %rax,10*8+8(%rsp)
+	CFI_REL_OFFSET rax,RAX+8
+	movq %r8, 9*8+8(%rsp)
+	CFI_REL_OFFSET r8,R8+8
+	movq %r9, 8*8+8(%rsp)
+	CFI_REL_OFFSET r9,R9+8
+	movq %r10,7*8+8(%rsp)
+	CFI_REL_OFFSET r10,R10+8
+	movq %r11,6*8+8(%rsp)
+	CFI_REL_OFFSET r11,R11+8
+	movq %rbx,5*8+8(%rsp)
+	CFI_REL_OFFSET rbx,RBX+8
+	movq %rbp,4*8+8(%rsp)
+	CFI_REL_OFFSET rbp,RBP+8
+	movq %r12,3*8+8(%rsp)
+	CFI_REL_OFFSET r12,R12+8
+	movq %r13,2*8+8(%rsp)
+	CFI_REL_OFFSET r13,R13+8
+	movq %r14,1*8+8(%rsp)
+	CFI_REL_OFFSET r14,R14+8
+	movq %r15,0*8+8(%rsp)
+	CFI_REL_OFFSET r15,R15+8
 	xorl %ebx,%ebx
-	testl $3,CS(%rsp)
-	je  error_kernelspace
+	testl $3,CS+8(%rsp)
+	je error_kernelspace
 error_swapgs:
 	SWAPGS
 error_sti:
 	TRACE_IRQS_OFF
-	movq %rdi,RDI(%rsp)
-	CFI_REL_OFFSET	rdi,RDI
-	movq %rsp,%rdi
-	movq ORIG_RAX(%rsp),%rsi	/* get error code */
-	movq $-1,ORIG_RAX(%rsp)
-	call *%rax
-	/* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */
-error_exit:
+	ret
+	CFI_ENDPROC
+
+/*
+ * There are two places in the kernel that can potentially fault with
+ * usergs. Handle them here. The exception handlers after iret run with
+ * kernel gs again, so don't set the user space flag. B stepping K8s
+ * sometimes report an truncated RIP for IRET exceptions returning to
+ * compat mode. Check for these here too.
+ */
+error_kernelspace:
+	incl %ebx
+	leaq irq_return(%rip),%rcx
+	cmpq %rcx,RIP+8(%rsp)
+	je error_swapgs
+	movl %ecx,%ecx	/* zero extend */
+	cmpq %rcx,RIP+8(%rsp)
+	je error_swapgs
+	cmpq $gs_change,RIP+8(%rsp)
+        je error_swapgs
+	jmp error_sti
+KPROBE_END(error_entry)
+
+
+/* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */
+KPROBE_ENTRY(error_exit)
+	_frame R15
 	movl %ebx,%eax
 	RESTORE_REST
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)
 	testl %eax,%eax
-	jne  retint_kernel
+	jne retint_kernel
 	LOCKDEP_SYS_EXIT_IRQ
-	movl  TI_flags(%rcx),%edx
-	movl  $_TIF_WORK_MASK,%edi
-	andl  %edi,%edx
-	jnz  retint_careful
+	movl TI_flags(%rcx),%edx
+	movl $_TIF_WORK_MASK,%edi
+	andl %edi,%edx
+	jnz retint_careful
 	jmp retint_swapgs
 	CFI_ENDPROC
-
-error_kernelspace:
-	incl %ebx
-       /* There are two places in the kernel that can potentially fault with
-          usergs. Handle them here. The exception handlers after
-	   iret run with kernel gs again, so don't set the user space flag.
-	   B stepping K8s sometimes report an truncated RIP for IRET
-	   exceptions returning to compat mode. Check for these here too. */
-	leaq irq_return(%rip),%rcx
-	cmpq %rcx,RIP(%rsp)
-	je   error_swapgs
-	movl %ecx,%ecx	/* zero extend */
-	cmpq %rcx,RIP(%rsp)
-	je   error_swapgs
-	cmpq $gs_change,RIP(%rsp)
-        je   error_swapgs
-	jmp  error_sti
-KPROBE_END(error_entry)
+KPROBE_END(error_exit)
 
        /* Reload gs selector with exception handling */
        /* edi:  new selector */
-- 
cgit v1.2.3-70-g09d2


From dcd072e26055de600cecdc3f7a1e083ecd55c2e4 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Thu, 20 Nov 2008 14:40:11 +0100
Subject: x86: clean up after: move entry_64.S register saving out of the
 macros

This add-on patch to x86: move entry_64.S register saving out
of the macros visually cleans up the appearance of the code by
introducing some basic helper macro's. It also adds some cfi
annotations which were missing.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S | 220 +++++++++++++++++++++++----------------------
 1 file changed, 112 insertions(+), 108 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 5a12432ccdf9..7a04f696121d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -60,6 +60,23 @@
 #define __AUDIT_ARCH_LE	   0x40000000
 
 	.code64
+/*
+ * Some macro's to hide the most frequently occuring CFI annotations.
+ */
+	.macro CFI_PUSHQ reg
+	pushq \reg
+	CFI_ADJUST_CFA_OFFSET 8
+	.endm
+
+	.macro CFI_POPQ reg
+	popq \reg
+	CFI_ADJUST_CFA_OFFSET -8
+	.endm
+
+	.macro CFI_MOVQ reg offset=0
+	movq %\reg, \offset(%rsp)
+	CFI_REL_OFFSET \reg, \offset
+	.endm
 
 #ifdef CONFIG_FUNCTION_TRACER
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -213,84 +230,84 @@ ENTRY(native_usergs_sysret64)
 	CFI_ADJUST_CFA_OFFSET	-(6*8)
 	.endm
 
-	.macro	CFI_DEFAULT_STACK start=1
+/*
+ * initial frame state for interrupts (and exceptions without error code)
+ */
+	.macro EMPTY_FRAME start=1 offset=0
 	.if \start
-	CFI_STARTPROC	simple
+	CFI_STARTPROC simple
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,SS+8
+	CFI_DEF_CFA rsp,8+\offset
 	.else
-	CFI_DEF_CFA_OFFSET SS+8
+	CFI_DEF_CFA_OFFSET 8+\offset
 	.endif
-	CFI_REL_OFFSET	r15,R15
-	CFI_REL_OFFSET	r14,R14
-	CFI_REL_OFFSET	r13,R13
-	CFI_REL_OFFSET	r12,R12
-	CFI_REL_OFFSET	rbp,RBP
-	CFI_REL_OFFSET	rbx,RBX
-	CFI_REL_OFFSET	r11,R11
-	CFI_REL_OFFSET	r10,R10
-	CFI_REL_OFFSET	r9,R9
-	CFI_REL_OFFSET	r8,R8
-	CFI_REL_OFFSET	rax,RAX
-	CFI_REL_OFFSET	rcx,RCX
-	CFI_REL_OFFSET	rdx,RDX
-	CFI_REL_OFFSET	rsi,RSI
-	CFI_REL_OFFSET	rdi,RDI
-	CFI_REL_OFFSET	rip,RIP
-	/*CFI_REL_OFFSET	cs,CS*/
-	/*CFI_REL_OFFSET	rflags,EFLAGS*/
-	CFI_REL_OFFSET	rsp,RSP
-	/*CFI_REL_OFFSET	ss,SS*/
 	.endm
 
 /*
- * initial frame state for interrupts and exceptions
+ * initial frame state for interrupts (and exceptions without error code)
  */
-	.macro _frame ref
-	CFI_STARTPROC simple
-	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA rsp,SS+8-\ref
-	/*CFI_REL_OFFSET ss,SS-\ref*/
-	CFI_REL_OFFSET rsp,RSP-\ref
-	/*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
-	/*CFI_REL_OFFSET cs,CS-\ref*/
-	CFI_REL_OFFSET rip,RIP-\ref
+	.macro INTR_FRAME start=1 offset=0
+	EMPTY_FRAME \start, (SS+8-RIP)+\offset
+	/*CFI_REL_OFFSET ss, SS-RIP+\offset*/
+	CFI_REL_OFFSET rsp, RSP-RIP+\offset
+	/*CFI_REL_OFFSET rflags, EFLAGS-RIP+\offset*/
+	/*CFI_REL_OFFSET cs, CS-RIP+\offset*/
+	CFI_REL_OFFSET rip, RIP-RIP+\offset
 	.endm
 
-/*
- * initial frame state for interrupts (and exceptions without error code)
- */
-#define INTR_FRAME _frame RIP
 /*
  * initial frame state for exceptions with error code (and interrupts
  * with vector already pushed)
  */
-#define XCPT_FRAME _frame ORIG_RAX
+	.macro XCPT_FRAME start=1 offset=0
+	INTR_FRAME \start, (RIP-ORIG_RAX)+\offset
+	/*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
+	.endm
+
+/*
+ * frame that enables calling into C.
+ */
+	.macro PARTIAL_FRAME start=1 offset=0
+	XCPT_FRAME \start, (ORIG_RAX-ARGOFFSET)+\offset
+	CFI_REL_OFFSET rdi, (RDI-ARGOFFSET)+\offset
+	CFI_REL_OFFSET rsi, (RSI-ARGOFFSET)+\offset
+	CFI_REL_OFFSET rdx, (RDX-ARGOFFSET)+\offset
+	CFI_REL_OFFSET rcx, (RCX-ARGOFFSET)+\offset
+	CFI_REL_OFFSET rax, (RAX-ARGOFFSET)+\offset
+	CFI_REL_OFFSET r8, (R8-ARGOFFSET)+\offset
+	CFI_REL_OFFSET r9, (R9-ARGOFFSET)+\offset
+	CFI_REL_OFFSET r10, (R10-ARGOFFSET)+\offset
+	CFI_REL_OFFSET r11, (R11-ARGOFFSET)+\offset
+	.endm
+
+/*
+ * frame that enables passing a complete pt_regs to a C function.
+ */
+	.macro DEFAULT_FRAME start=1 offset=0
+	PARTIAL_FRAME \start, (R11-R15)+\offset
+	CFI_REL_OFFSET rbx, RBX+\offset
+	CFI_REL_OFFSET rbp, RBP+\offset
+	CFI_REL_OFFSET r12, R12+\offset
+	CFI_REL_OFFSET r13, R13+\offset
+	CFI_REL_OFFSET r14, R14+\offset
+	CFI_REL_OFFSET r15, R15+\offset
+	.endm
 
 /* save partial stack frame */
 ENTRY(save_args)
 	XCPT_FRAME
 	cld
-	movq  %rdi, 8*8+16(%rsp)
-	CFI_REL_OFFSET rdi, 8*8+16
-	movq  %rsi, 7*8+16(%rsp)
-	CFI_REL_OFFSET rsi, 7*8+16
-	movq  %rdx, 6*8+16(%rsp)
-	CFI_REL_OFFSET rdx, 6*8+16
-	movq  %rcx, 5*8+16(%rsp)
-	CFI_REL_OFFSET rcx, 5*8+16
-	movq  %rax, 4*8+16(%rsp)
-	CFI_REL_OFFSET rax, 4*8+16
-	movq  %r8, 3*8+16(%rsp)
-	CFI_REL_OFFSET r8, 3*8+16
-	movq  %r9, 2*8+16(%rsp)
-	CFI_REL_OFFSET r9, 2*8+16
-	movq  %r10, 1*8+16(%rsp)
-	CFI_REL_OFFSET r10, 1*8+16
-	movq  %r11, 0*8+16(%rsp)
-	CFI_REL_OFFSET r11, 0*8+16
+	CFI_MOVQ rdi, (RDI-ARGOFFSET)+16
+	CFI_MOVQ rsi, (RSI-ARGOFFSET)+16
+	CFI_MOVQ rdx, (RDX-ARGOFFSET)+16
+	CFI_MOVQ rcx, (RCX-ARGOFFSET)+16
+	CFI_MOVQ rax, (RAX-ARGOFFSET)+16
+	CFI_MOVQ r8, (R8-ARGOFFSET)+16
+	CFI_MOVQ r9, (R9-ARGOFFSET)+16
+	CFI_MOVQ r10, (R10-ARGOFFSET)+16
+	CFI_MOVQ r11, (R11-ARGOFFSET)+16
 	leaq -ARGOFFSET+16(%rsp),%rdi	/* arg1 for handler */
-	movq %rbp, 8(%rsp)		/* push %rbp */
+	CFI_MOVQ rbp, 8		/* push %rbp */
 	leaq 8(%rsp), %rbp		/* mov %rsp, %ebp */
 	testl $3, CS(%rdi)
 	je 1f
@@ -303,9 +320,10 @@ ENTRY(save_args)
 	 */
 1:	incl %gs:pda_irqcount
 	jne 2f
-	pop %rax			/* move return address... */
+	CFI_POPQ %rax			/* move return address... */
 	mov %gs:pda_irqstackptr,%rsp
-	push %rax			/* ... to the new stack */
+	EMPTY_FRAME 0
+	CFI_PUSHQ %rax			/* ... to the new stack */
 	/*
 	 * We entered an interrupt context - irqs are off:
 	 */
@@ -319,7 +337,7 @@ END(save_args)
  */
 /* rdi:	prev */
 ENTRY(ret_from_fork)
-	CFI_DEFAULT_STACK
+	DEFAULT_FRAME
 	push kernel_eflags(%rip)
 	CFI_ADJUST_CFA_OFFSET 8
 	popf				# reset kernel eflags
@@ -732,6 +750,7 @@ END(interrupt)
 	subq $10*8, %rsp
 	CFI_ADJUST_CFA_OFFSET 10*8
 	call save_args
+	PARTIAL_FRAME 0
 	call \func
 	.endm
 
@@ -949,11 +968,11 @@ END(spurious_interrupt)
 	.macro zeroentry sym
 	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
-	pushq $-1		/* ORIG_RAX: no syscall to restart */
-	CFI_ADJUST_CFA_OFFSET 8
+	CFI_PUSHQ $-1		/* ORIG_RAX: no syscall to restart */
 	subq $15*8,%rsp
 	CFI_ADJUST_CFA_OFFSET 15*8
 	call error_entry
+	DEFAULT_FRAME 0
 	movq %rsp,%rdi		/* pt_regs pointer */
 	xorl %esi,%esi		/* no error code */
 	call \sym
@@ -967,6 +986,7 @@ END(spurious_interrupt)
 	subq $15*8,%rsp
 	CFI_ADJUST_CFA_OFFSET 15*8
 	call error_entry
+	DEFAULT_FRAME 0
 	movq %rsp,%rdi			/* pt_regs pointer */
 	movq ORIG_RAX(%rsp),%rsi	/* get error code */
 	movq $-1,ORIG_RAX(%rsp)		/* no syscall to restart */
@@ -1079,40 +1099,25 @@ paranoid_schedule\trace:
  * returns in "no swapgs flag" in %ebx.
  */
 KPROBE_ENTRY(error_entry)
-	_frame RDI
+	XCPT_FRAME
 	CFI_ADJUST_CFA_OFFSET 15*8
 	/* oldrax contains error code */
 	cld
-	movq %rdi,14*8+8(%rsp)
-	CFI_REL_OFFSET rdi,RDI+8
-	movq %rsi,13*8+8(%rsp)
-	CFI_REL_OFFSET rsi,RSI+8
-	movq %rdx,12*8+8(%rsp)
-	CFI_REL_OFFSET rdx,RDX+8
-	movq %rcx,11*8+8(%rsp)
-	CFI_REL_OFFSET rcx,RCX+8
-	movq %rax,10*8+8(%rsp)
-	CFI_REL_OFFSET rax,RAX+8
-	movq %r8, 9*8+8(%rsp)
-	CFI_REL_OFFSET r8,R8+8
-	movq %r9, 8*8+8(%rsp)
-	CFI_REL_OFFSET r9,R9+8
-	movq %r10,7*8+8(%rsp)
-	CFI_REL_OFFSET r10,R10+8
-	movq %r11,6*8+8(%rsp)
-	CFI_REL_OFFSET r11,R11+8
-	movq %rbx,5*8+8(%rsp)
-	CFI_REL_OFFSET rbx,RBX+8
-	movq %rbp,4*8+8(%rsp)
-	CFI_REL_OFFSET rbp,RBP+8
-	movq %r12,3*8+8(%rsp)
-	CFI_REL_OFFSET r12,R12+8
-	movq %r13,2*8+8(%rsp)
-	CFI_REL_OFFSET r13,R13+8
-	movq %r14,1*8+8(%rsp)
-	CFI_REL_OFFSET r14,R14+8
-	movq %r15,0*8+8(%rsp)
-	CFI_REL_OFFSET r15,R15+8
+	CFI_MOVQ rdi, RDI+8
+	CFI_MOVQ rsi, RSI+8
+	CFI_MOVQ rdx, RDX+8
+	CFI_MOVQ rcx, RCX+8
+	CFI_MOVQ rax, RAX+8
+	CFI_MOVQ r8, R8+8
+	CFI_MOVQ r9, R9+8
+	CFI_MOVQ r10, R10+8
+	CFI_MOVQ r11, R11+8
+	CFI_MOVQ rbx, RBX+8
+	CFI_MOVQ rbp, RBP+8
+	CFI_MOVQ r12, R12+8
+	CFI_MOVQ r13, R13+8
+	CFI_MOVQ r14, R14+8
+	CFI_MOVQ r15, R15+8
 	xorl %ebx,%ebx
 	testl $3,CS+8(%rsp)
 	je error_kernelspace
@@ -1146,7 +1151,7 @@ KPROBE_END(error_entry)
 
 /* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */
 KPROBE_ENTRY(error_exit)
-	_frame R15
+	DEFAULT_FRAME
 	movl %ebx,%eax
 	RESTORE_REST
 	DISABLE_INTERRUPTS(CLBR_NONE)
@@ -1455,7 +1460,7 @@ ENTRY(xen_do_hypervisor_callback)   # do_hypervisor_callback(struct *pt_regs)
    see the correct pointer to the pt_regs */
 	movq %rdi, %rsp            # we don't return, adjust the stack frame
 	CFI_ENDPROC
-	CFI_DEFAULT_STACK
+	DEFAULT_FRAME
 11:	incl %gs:pda_irqcount
 	movq %rsp,%rbp
 	CFI_DEF_CFA_REGISTER rbp
@@ -1483,10 +1488,13 @@ END(do_hypervisor_callback)
 # with its current contents: any discrepancy means we in category 1.
 */
 ENTRY(xen_failsafe_callback)
-	framesz = (RIP-0x30)	/* workaround buggy gas */
-	_frame framesz
-	CFI_REL_OFFSET rcx, 0
-	CFI_REL_OFFSET r11, 8
+	INTR_FRAME 1 (6*8)
+	/*CFI_REL_OFFSET gs,GS*/
+	/*CFI_REL_OFFSET fs,FS*/
+	/*CFI_REL_OFFSET es,ES*/
+	/*CFI_REL_OFFSET ds,DS*/
+	CFI_REL_OFFSET r11,8
+	CFI_REL_OFFSET rcx,0
 	movw %ds,%cx
 	cmpw %cx,0x10(%rsp)
 	CFI_REMEMBER_STATE
@@ -1507,12 +1515,9 @@ ENTRY(xen_failsafe_callback)
 	CFI_RESTORE r11
 	addq $0x30,%rsp
 	CFI_ADJUST_CFA_OFFSET -0x30
-	pushq $0
-	CFI_ADJUST_CFA_OFFSET 8
-	pushq %r11
-	CFI_ADJUST_CFA_OFFSET 8
-	pushq %rcx
-	CFI_ADJUST_CFA_OFFSET 8
+	CFI_PUSHQ $0	/* RIP */
+	CFI_PUSHQ %r11
+	CFI_PUSHQ %rcx
 	jmp general_protection
 	CFI_RESTORE_STATE
 1:	/* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
@@ -1522,8 +1527,7 @@ ENTRY(xen_failsafe_callback)
 	CFI_RESTORE r11
 	addq $0x30,%rsp
 	CFI_ADJUST_CFA_OFFSET -0x30
-	pushq $0
-	CFI_ADJUST_CFA_OFFSET 8
+	CFI_PUSHQ $0
 	SAVE_ALL
 	jmp error_exit
 	CFI_ENDPROC
-- 
cgit v1.2.3-70-g09d2


From 3ddd972d970fdabbe6515aa2f95e0ef2c8df903d Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Thu, 20 Nov 2008 18:32:17 -0800
Subject: x86: signal: rename COPY_SEG_STRICT to COPY_SEG_CPL3

Impact: cleanup

Rename macro COPY_SEG_STRICT to COPY_SEG_CPL3, as suggested by hpa.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 8 ++++----
 arch/x86/kernel/signal_64.c | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 514171ac0d03..c2aabeba27a5 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -145,7 +145,7 @@ asmlinkage int sys_sigaltstack(unsigned long bx)
 		regs->seg = tmp;			\
 }
 
-#define COPY_SEG_STRICT(seg)	{			\
+#define COPY_SEG_CPL3(seg)	{			\
 		unsigned short tmp;			\
 		err |= __get_user(tmp, &sc->seg);	\
 		regs->seg = tmp | 3;			\
@@ -193,13 +193,13 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 #endif /* CONFIG_X86_64 */
 
 #ifdef CONFIG_X86_32
-	COPY_SEG_STRICT(cs);
-	COPY_SEG_STRICT(ss);
+	COPY_SEG_CPL3(cs);
+	COPY_SEG_CPL3(ss);
 #else /* !CONFIG_X86_32 */
 	/* Kernel saves and restores only the CS segment register on signals,
 	 * which is the bare minimum needed to allow mixed 32/64-bit code.
 	 * App's signal handler can save/restore other segments if needed. */
-	COPY_SEG_STRICT(cs);
+	COPY_SEG_CPL3(cs);
 #endif /* CONFIG_X86_32 */
 
 	err |= __get_user(tmpflags, &sc->flags);
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index d2307e41fbdb..3d54d366ccb2 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -56,7 +56,7 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 	err |= __get_user(regs->x, &sc->x);	\
 }
 
-#define COPY_SEG_STRICT(seg)	{			\
+#define COPY_SEG_CPL3(seg)	{			\
 		unsigned short tmp;			\
 		err |= __get_user(tmp, &sc->seg);	\
 		regs->seg = tmp | 3;			\
@@ -98,13 +98,13 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 #endif /* CONFIG_X86_64 */
 
 #ifdef CONFIG_X86_32
-	COPY_SEG_STRICT(cs);
-	COPY_SEG_STRICT(ss);
+	COPY_SEG_CPL3(cs);
+	COPY_SEG_CPL3(ss);
 #else /* !CONFIG_X86_32 */
 	/* Kernel saves and restores only the CS segment register on signals,
 	 * which is the bare minimum needed to allow mixed 32/64-bit code.
 	 * App's signal handler can save/restore other segments if needed. */
-	COPY_SEG_STRICT(cs);
+	COPY_SEG_CPL3(cs);
 #endif /* CONFIG_X86_32 */
 
 	err |= __get_user(tmpflags, &sc->flags);
-- 
cgit v1.2.3-70-g09d2


From e8a0e27662186f8856a0a6242e7a8386c9a64a53 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 21 Nov 2008 15:11:32 +0100
Subject: x86: clean up after: move entry_64.S register saving out of the
 macros, fix

Impact: build fix

The break builds with older binutils (2.16.1):

 arch/x86/kernel/entry_64.S: Assembler messages:
 arch/x86/kernel/entry_64.S:282: Error: too many positional arguments
 arch/x86/kernel/entry_64.S:283: Error: too many positional arguments
 arch/x86/kernel/entry_64.S:284: Error: too many positional arguments
 arch/x86/kernel/entry_64.S:285: Error: too many positional arguments
 arch/x86/kernel/entry_64.S:286: Error: too many positional arguments
 arch/x86/kernel/entry_64.S:287: Error: too many positional arguments
 arch/x86/kernel/entry_64.S:288: Error: too many positional arguments
 arch/x86/kernel/entry_64.S:289: Error: too many positional arguments
 arch/x86/kernel/entry_64.S:290: Error: too many positional arguments

Took some time to figure out the detail that GAS chokes on: it's
negative offsets. Rearrange the calculations to make sure we never
go negative.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S | 54 +++++++++++++++++++++++-----------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 7a04f696121d..4e3d83678f85 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -247,12 +247,12 @@ ENTRY(native_usergs_sysret64)
  * initial frame state for interrupts (and exceptions without error code)
  */
 	.macro INTR_FRAME start=1 offset=0
-	EMPTY_FRAME \start, (SS+8-RIP)+\offset
-	/*CFI_REL_OFFSET ss, SS-RIP+\offset*/
-	CFI_REL_OFFSET rsp, RSP-RIP+\offset
-	/*CFI_REL_OFFSET rflags, EFLAGS-RIP+\offset*/
-	/*CFI_REL_OFFSET cs, CS-RIP+\offset*/
-	CFI_REL_OFFSET rip, RIP-RIP+\offset
+	EMPTY_FRAME \start, SS+8+\offset-RIP
+	/*CFI_REL_OFFSET ss, SS+\offset-RIP*/
+	CFI_REL_OFFSET rsp, RSP+\offset-RIP
+	/*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
+	/*CFI_REL_OFFSET cs, CS+\offset-RIP*/
+	CFI_REL_OFFSET rip, RIP+\offset-RIP
 	.endm
 
 /*
@@ -260,7 +260,7 @@ ENTRY(native_usergs_sysret64)
  * with vector already pushed)
  */
 	.macro XCPT_FRAME start=1 offset=0
-	INTR_FRAME \start, (RIP-ORIG_RAX)+\offset
+	INTR_FRAME \start, RIP+\offset-ORIG_RAX
 	/*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
 	.endm
 
@@ -268,23 +268,23 @@ ENTRY(native_usergs_sysret64)
  * frame that enables calling into C.
  */
 	.macro PARTIAL_FRAME start=1 offset=0
-	XCPT_FRAME \start, (ORIG_RAX-ARGOFFSET)+\offset
-	CFI_REL_OFFSET rdi, (RDI-ARGOFFSET)+\offset
-	CFI_REL_OFFSET rsi, (RSI-ARGOFFSET)+\offset
-	CFI_REL_OFFSET rdx, (RDX-ARGOFFSET)+\offset
-	CFI_REL_OFFSET rcx, (RCX-ARGOFFSET)+\offset
-	CFI_REL_OFFSET rax, (RAX-ARGOFFSET)+\offset
-	CFI_REL_OFFSET r8, (R8-ARGOFFSET)+\offset
-	CFI_REL_OFFSET r9, (R9-ARGOFFSET)+\offset
-	CFI_REL_OFFSET r10, (R10-ARGOFFSET)+\offset
-	CFI_REL_OFFSET r11, (R11-ARGOFFSET)+\offset
+	XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
+	CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
+	CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
+	CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
+	CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
+	CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
+	CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
+	CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
+	CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
+	CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
 	.endm
 
 /*
  * frame that enables passing a complete pt_regs to a C function.
  */
 	.macro DEFAULT_FRAME start=1 offset=0
-	PARTIAL_FRAME \start, (R11-R15)+\offset
+	PARTIAL_FRAME \start, R11+\offset-R15
 	CFI_REL_OFFSET rbx, RBX+\offset
 	CFI_REL_OFFSET rbp, RBP+\offset
 	CFI_REL_OFFSET r12, R12+\offset
@@ -297,15 +297,15 @@ ENTRY(native_usergs_sysret64)
 ENTRY(save_args)
 	XCPT_FRAME
 	cld
-	CFI_MOVQ rdi, (RDI-ARGOFFSET)+16
-	CFI_MOVQ rsi, (RSI-ARGOFFSET)+16
-	CFI_MOVQ rdx, (RDX-ARGOFFSET)+16
-	CFI_MOVQ rcx, (RCX-ARGOFFSET)+16
-	CFI_MOVQ rax, (RAX-ARGOFFSET)+16
-	CFI_MOVQ r8, (R8-ARGOFFSET)+16
-	CFI_MOVQ r9, (R9-ARGOFFSET)+16
-	CFI_MOVQ r10, (R10-ARGOFFSET)+16
-	CFI_MOVQ r11, (R11-ARGOFFSET)+16
+	CFI_MOVQ rdi, RDI+16-ARGOFFSET
+	CFI_MOVQ rsi, RSI+16-ARGOFFSET
+	CFI_MOVQ rdx, RDX+16-ARGOFFSET
+	CFI_MOVQ rcx, RCX+16-ARGOFFSET
+	CFI_MOVQ rax, RAX+16-ARGOFFSET
+	CFI_MOVQ r8, R8+16-ARGOFFSET
+	CFI_MOVQ r9, R9+16-ARGOFFSET
+	CFI_MOVQ r10, R10+16-ARGOFFSET
+	CFI_MOVQ r11, R11+16-ARGOFFSET
 	leaq -ARGOFFSET+16(%rsp),%rdi	/* arg1 for handler */
 	CFI_MOVQ rbp, 8		/* push %rbp */
 	leaq 8(%rsp), %rbp		/* mov %rsp, %ebp */
-- 
cgit v1.2.3-70-g09d2


From 14ae22ba2b8bb3d53fb795f9b8074aa39ef7b6cd Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 21 Nov 2008 15:20:47 +0100
Subject: x86: entry_64.S: rename

Impact: cleanup

Rename:

   CFI_PUSHQ  =>  pushq_cfi
   CFI_POPQ   =>  popq_cfi
   CFI_MOVQ   =>  movq_cfi

To make it blend better into regular assembly code.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S | 71 +++++++++++++++++++++++-----------------------
 1 file changed, 36 insertions(+), 35 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 4e3d83678f85..92c5e18340db 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -63,17 +63,17 @@
 /*
  * Some macro's to hide the most frequently occuring CFI annotations.
  */
-	.macro CFI_PUSHQ reg
+	.macro pushq_cfi reg
 	pushq \reg
 	CFI_ADJUST_CFA_OFFSET 8
 	.endm
 
-	.macro CFI_POPQ reg
+	.macro popq_cfi reg
 	popq \reg
 	CFI_ADJUST_CFA_OFFSET -8
 	.endm
 
-	.macro CFI_MOVQ reg offset=0
+	.macro movq_cfi reg offset=0
 	movq %\reg, \offset(%rsp)
 	CFI_REL_OFFSET \reg, \offset
 	.endm
@@ -297,17 +297,18 @@ ENTRY(native_usergs_sysret64)
 ENTRY(save_args)
 	XCPT_FRAME
 	cld
-	CFI_MOVQ rdi, RDI+16-ARGOFFSET
-	CFI_MOVQ rsi, RSI+16-ARGOFFSET
-	CFI_MOVQ rdx, RDX+16-ARGOFFSET
-	CFI_MOVQ rcx, RCX+16-ARGOFFSET
-	CFI_MOVQ rax, RAX+16-ARGOFFSET
-	CFI_MOVQ r8, R8+16-ARGOFFSET
-	CFI_MOVQ r9, R9+16-ARGOFFSET
-	CFI_MOVQ r10, R10+16-ARGOFFSET
-	CFI_MOVQ r11, R11+16-ARGOFFSET
+	movq_cfi rdi, RDI+16-ARGOFFSET
+	movq_cfi rsi, RSI+16-ARGOFFSET
+	movq_cfi rdx, RDX+16-ARGOFFSET
+	movq_cfi rcx, RCX+16-ARGOFFSET
+	movq_cfi rax, RAX+16-ARGOFFSET
+	movq_cfi  r8,  R8+16-ARGOFFSET
+	movq_cfi  r9,  R9+16-ARGOFFSET
+	movq_cfi r10, R10+16-ARGOFFSET
+	movq_cfi r11, R11+16-ARGOFFSET
+
 	leaq -ARGOFFSET+16(%rsp),%rdi	/* arg1 for handler */
-	CFI_MOVQ rbp, 8		/* push %rbp */
+	movq_cfi rbp, 8		/* push %rbp */
 	leaq 8(%rsp), %rbp		/* mov %rsp, %ebp */
 	testl $3, CS(%rdi)
 	je 1f
@@ -320,10 +321,10 @@ ENTRY(save_args)
 	 */
 1:	incl %gs:pda_irqcount
 	jne 2f
-	CFI_POPQ %rax			/* move return address... */
+	popq_cfi %rax			/* move return address... */
 	mov %gs:pda_irqstackptr,%rsp
 	EMPTY_FRAME 0
-	CFI_PUSHQ %rax			/* ... to the new stack */
+	pushq_cfi %rax			/* ... to the new stack */
 	/*
 	 * We entered an interrupt context - irqs are off:
 	 */
@@ -968,7 +969,7 @@ END(spurious_interrupt)
 	.macro zeroentry sym
 	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
-	CFI_PUSHQ $-1		/* ORIG_RAX: no syscall to restart */
+	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
 	subq $15*8,%rsp
 	CFI_ADJUST_CFA_OFFSET 15*8
 	call error_entry
@@ -1103,21 +1104,21 @@ KPROBE_ENTRY(error_entry)
 	CFI_ADJUST_CFA_OFFSET 15*8
 	/* oldrax contains error code */
 	cld
-	CFI_MOVQ rdi, RDI+8
-	CFI_MOVQ rsi, RSI+8
-	CFI_MOVQ rdx, RDX+8
-	CFI_MOVQ rcx, RCX+8
-	CFI_MOVQ rax, RAX+8
-	CFI_MOVQ r8, R8+8
-	CFI_MOVQ r9, R9+8
-	CFI_MOVQ r10, R10+8
-	CFI_MOVQ r11, R11+8
-	CFI_MOVQ rbx, RBX+8
-	CFI_MOVQ rbp, RBP+8
-	CFI_MOVQ r12, R12+8
-	CFI_MOVQ r13, R13+8
-	CFI_MOVQ r14, R14+8
-	CFI_MOVQ r15, R15+8
+	movq_cfi rdi, RDI+8
+	movq_cfi rsi, RSI+8
+	movq_cfi rdx, RDX+8
+	movq_cfi rcx, RCX+8
+	movq_cfi rax, RAX+8
+	movq_cfi  r8,  R8+8
+	movq_cfi  r9,  R9+8
+	movq_cfi r10, R10+8
+	movq_cfi r11, R11+8
+	movq_cfi rbx, RBX+8
+	movq_cfi rbp, RBP+8
+	movq_cfi r12, R12+8
+	movq_cfi r13, R13+8
+	movq_cfi r14, R14+8
+	movq_cfi r15, R15+8
 	xorl %ebx,%ebx
 	testl $3,CS+8(%rsp)
 	je error_kernelspace
@@ -1515,9 +1516,9 @@ ENTRY(xen_failsafe_callback)
 	CFI_RESTORE r11
 	addq $0x30,%rsp
 	CFI_ADJUST_CFA_OFFSET -0x30
-	CFI_PUSHQ $0	/* RIP */
-	CFI_PUSHQ %r11
-	CFI_PUSHQ %rcx
+	pushq_cfi $0	/* RIP */
+	pushq_cfi %r11
+	pushq_cfi %rcx
 	jmp general_protection
 	CFI_RESTORE_STATE
 1:	/* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
@@ -1527,7 +1528,7 @@ ENTRY(xen_failsafe_callback)
 	CFI_RESTORE r11
 	addq $0x30,%rsp
 	CFI_ADJUST_CFA_OFFSET -0x30
-	CFI_PUSHQ $0
+	pushq_cfi $0
 	SAVE_ALL
 	jmp error_exit
 	CFI_ENDPROC
-- 
cgit v1.2.3-70-g09d2


From c002a1e6b6b6f07ae04e68987054bf1f2150ae48 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Fri, 21 Nov 2008 16:41:55 +0100
Subject: x86: introduce save_rest and restructure the PTREGSCALL macro in
 entry_64.S

Impact: cleanup

The save_rest function completes a partial stack frame for use
by the PTREGSCALL macro. This also avoids the indirect call in
PTREGSCALLs.

This adds the macro movq_cfi_restore to hide the CFI_RESTORE
annotation when restoring a register from the stack frame.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S | 91 +++++++++++++++++++++++++++-------------------
 1 file changed, 53 insertions(+), 38 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 92c5e18340db..ef95c45b9269 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -78,6 +78,11 @@
 	CFI_REL_OFFSET \reg, \offset
 	.endm
 
+	.macro movq_cfi_restore offset reg
+	movq \offset(%rsp), %\reg
+	CFI_RESTORE \reg
+	.endm
+
 #ifdef CONFIG_FUNCTION_TRACER
 #ifdef CONFIG_DYNAMIC_FTRACE
 ENTRY(mcount)
@@ -186,21 +191,21 @@ ENTRY(native_usergs_sysret64)
  */
 
 	/* %rsp:at FRAMEEND */
-	.macro FIXUP_TOP_OF_STACK tmp
-	movq	%gs:pda_oldrsp,\tmp
-	movq  	\tmp,RSP(%rsp)
-	movq    $__USER_DS,SS(%rsp)
-	movq    $__USER_CS,CS(%rsp)
-	movq 	$-1,RCX(%rsp)
-	movq	R11(%rsp),\tmp  /* get eflags */
-	movq	\tmp,EFLAGS(%rsp)
+	.macro FIXUP_TOP_OF_STACK tmp offset=0
+	movq %gs:pda_oldrsp,\tmp
+	movq \tmp,RSP+\offset(%rsp)
+	movq $__USER_DS,SS+\offset(%rsp)
+	movq $__USER_CS,CS+\offset(%rsp)
+	movq $-1,RCX+\offset(%rsp)
+	movq R11+\offset(%rsp),\tmp  /* get eflags */
+	movq \tmp,EFLAGS+\offset(%rsp)
 	.endm
 
-	.macro RESTORE_TOP_OF_STACK tmp,offset=0
-	movq   RSP-\offset(%rsp),\tmp
-	movq   \tmp,%gs:pda_oldrsp
-	movq   EFLAGS-\offset(%rsp),\tmp
-	movq   \tmp,R11-\offset(%rsp)
+	.macro RESTORE_TOP_OF_STACK tmp offset=0
+	movq RSP+\offset(%rsp),\tmp
+	movq \tmp,%gs:pda_oldrsp
+	movq EFLAGS+\offset(%rsp),\tmp
+	movq \tmp,R11+\offset(%rsp)
 	.endm
 
 	.macro FAKE_STACK_FRAME child_rip
@@ -333,6 +338,21 @@ ENTRY(save_args)
 	CFI_ENDPROC
 END(save_args)
 
+ENTRY(save_rest)
+	PARTIAL_FRAME 1 REST_SKIP+8
+	movq 5*8+16(%rsp), %r11	/* save return address */
+	movq_cfi rbx, RBX+16
+	movq_cfi rbp, RBP+16
+	movq_cfi r12, R12+16
+	movq_cfi r13, R13+16
+	movq_cfi r14, R14+16
+	movq_cfi r15, R15+16
+	movq %r11, 8(%rsp)	/* return address */
+	FIXUP_TOP_OF_STACK %r11, 16
+	ret
+	CFI_ENDPROC
+END(save_rest)
+
 /*
  * A newly forked process directly context switches into this.
  */
@@ -353,7 +373,7 @@ rff_action:
 	je   int_ret_from_sys_call
 	testl $_TIF_IA32,TI_flags(%rcx)
 	jnz  int_ret_from_sys_call
-	RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
+	RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
 	jmp ret_from_sys_call
 rff_trace:
 	movq %rsp,%rdi
@@ -626,18 +646,20 @@ END(system_call)
 /*
  * Certain special system calls that need to save a complete full stack frame.
  */
-
 	.macro PTREGSCALL label,func,arg
-	.globl \label
-\label:
-	leaq	\func(%rip),%rax
-	leaq    -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
-	jmp	ptregscall_common
+ENTRY(\label)
+	PARTIAL_FRAME 1 8		/* offset 8: return address */
+	subq $REST_SKIP, %rsp
+	CFI_ADJUST_CFA_OFFSET REST_SKIP
+	call save_rest
+	DEFAULT_FRAME 0 8		/* offset 8: return address */
+	leaq 8(%rsp), \arg	/* pt_regs pointer */
+	call \func
+	jmp ptregscall_common
+	CFI_ENDPROC
 END(\label)
 	.endm
 
-	CFI_STARTPROC
-
 	PTREGSCALL stub_clone, sys_clone, %r8
 	PTREGSCALL stub_fork, sys_fork, %rdi
 	PTREGSCALL stub_vfork, sys_vfork, %rdi
@@ -645,22 +667,15 @@ END(\label)
 	PTREGSCALL stub_iopl, sys_iopl, %rsi
 
 ENTRY(ptregscall_common)
-	popq %r11
-	CFI_ADJUST_CFA_OFFSET -8
-	CFI_REGISTER rip, r11
-	SAVE_REST
-	movq %r11, %r15
-	CFI_REGISTER rip, r15
-	FIXUP_TOP_OF_STACK %r11
-	call *%rax
-	RESTORE_TOP_OF_STACK %r11
-	movq %r15, %r11
-	CFI_REGISTER rip, r11
-	RESTORE_REST
-	pushq %r11
-	CFI_ADJUST_CFA_OFFSET 8
-	CFI_REL_OFFSET rip, 0
-	ret
+	DEFAULT_FRAME 1 8	/* offset 8: return address */
+	RESTORE_TOP_OF_STACK %r11, 8
+	movq_cfi_restore R15+8, r15
+	movq_cfi_restore R14+8, r14
+	movq_cfi_restore R13+8, r13
+	movq_cfi_restore R12+8, r12
+	movq_cfi_restore RBP+8, rbp
+	movq_cfi_restore RBX+8, rbx
+	ret $REST_SKIP		/* pop extended registers */
 	CFI_ENDPROC
 END(ptregscall_common)
 
-- 
cgit v1.2.3-70-g09d2


From e2f6bc25b98dbb10d809ee50262b43fcae67840a Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Fri, 21 Nov 2008 16:43:18 +0100
Subject: x86: entry_64.S: factor out save_paranoid and paranoid_exit

Impact: cleanup, shrink kernel image size

Also expand the paranoid_exit0 macro into nmi_exit inside the
nmi stub in the case of enabled irq-tracing.

This gives a few hundred bytes code size reduction.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S | 151 ++++++++++++++++++++++++++++++---------------
 1 file changed, 102 insertions(+), 49 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index ef95c45b9269..fad777b11366 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -353,6 +353,36 @@ ENTRY(save_rest)
 	CFI_ENDPROC
 END(save_rest)
 
+/* save complete stack frame */
+ENTRY(save_paranoid)
+	XCPT_FRAME 1 RDI+8
+	cld
+	movq_cfi rdi, RDI+8
+	movq_cfi rsi, RSI+8
+	movq_cfi rdx, RDX+8
+	movq_cfi rcx, RCX+8
+	movq_cfi rax, RAX+8
+	movq_cfi r8, R8+8
+	movq_cfi r9, R9+8
+	movq_cfi r10, R10+8
+	movq_cfi r11, R11+8
+	movq_cfi rbx, RBX+8
+	movq_cfi rbp, RBP+8
+	movq_cfi r12, R12+8
+	movq_cfi r13, R13+8
+	movq_cfi r14, R14+8
+	movq_cfi r15, R15+8
+	movl $1,%ebx
+	movl $MSR_GS_BASE,%ecx
+	rdmsr
+	testl %edx,%edx
+	js 1f	/* negative -> in kernel */
+	SWAPGS
+	xorl %ebx,%ebx
+1:	ret
+	CFI_ENDPROC
+END(save_paranoid)
+
 /*
  * A newly forked process directly context switches into this.
  */
@@ -1012,24 +1042,15 @@ END(spurious_interrupt)
 	.endm
 
 	/* error code is on the stack already */
-	/* handle NMI like exceptions that can happen everywhere */
-	.macro paranoidentry sym, ist=0, irqtrace=1
-	SAVE_ALL
-	cld
-	movl $1,%ebx
-	movl  $MSR_GS_BASE,%ecx
-	rdmsr
-	testl %edx,%edx
-	js    1f
-	SWAPGS
-	xorl  %ebx,%ebx
-1:
+	.macro paranoidentry sym ist=0
+	subq $15*8, %rsp
+	CFI_ADJUST_CFA_OFFSET 15*8
+	call save_paranoid
+	DEFAULT_FRAME 0
 	.if \ist
 	movq	%gs:pda_data_offset, %rbp
 	.endif
-	.if \irqtrace
 	TRACE_IRQS_OFF
-	.endif
 	movq %rsp,%rdi
 	movq ORIG_RAX(%rsp),%rsi
 	movq $-1,ORIG_RAX(%rsp)
@@ -1041,9 +1062,7 @@ END(spurious_interrupt)
 	addq	$EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
 	.endif
 	DISABLE_INTERRUPTS(CLBR_NONE)
-	.if \irqtrace
 	TRACE_IRQS_OFF
-	.endif
 	.endm
 
 	/*
@@ -1058,57 +1077,48 @@ END(spurious_interrupt)
 	 * is fundamentally NMI-unsafe. (we cannot change the soft and
 	 * hard flags at once, atomically)
 	 */
-	.macro paranoidexit trace=1
+
 	/* ebx:	no swapgs flag */
-paranoid_exit\trace:
+KPROBE_ENTRY(paranoid_exit)
+	INTR_FRAME
 	testl %ebx,%ebx				/* swapgs needed? */
-	jnz paranoid_restore\trace
+	jnz paranoid_restore
 	testl $3,CS(%rsp)
-	jnz   paranoid_userspace\trace
-paranoid_swapgs\trace:
-	.if \trace
+	jnz   paranoid_userspace
+paranoid_swapgs:
 	TRACE_IRQS_IRETQ 0
-	.endif
 	SWAPGS_UNSAFE_STACK
-paranoid_restore\trace:
+paranoid_restore:
 	RESTORE_ALL 8
 	jmp irq_return
-paranoid_userspace\trace:
+paranoid_userspace:
 	GET_THREAD_INFO(%rcx)
 	movl TI_flags(%rcx),%ebx
 	andl $_TIF_WORK_MASK,%ebx
-	jz paranoid_swapgs\trace
+	jz paranoid_swapgs
 	movq %rsp,%rdi			/* &pt_regs */
 	call sync_regs
 	movq %rax,%rsp			/* switch stack for scheduling */
 	testl $_TIF_NEED_RESCHED,%ebx
-	jnz paranoid_schedule\trace
+	jnz paranoid_schedule
 	movl %ebx,%edx			/* arg3: thread flags */
-	.if \trace
 	TRACE_IRQS_ON
-	.endif
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	xorl %esi,%esi 			/* arg2: oldset */
 	movq %rsp,%rdi 			/* arg1: &pt_regs */
 	call do_notify_resume
 	DISABLE_INTERRUPTS(CLBR_NONE)
-	.if \trace
 	TRACE_IRQS_OFF
-	.endif
-	jmp paranoid_userspace\trace
-paranoid_schedule\trace:
-	.if \trace
+	jmp paranoid_userspace
+paranoid_schedule:
 	TRACE_IRQS_ON
-	.endif
 	ENABLE_INTERRUPTS(CLBR_ANY)
 	call schedule
 	DISABLE_INTERRUPTS(CLBR_ANY)
-	.if \trace
 	TRACE_IRQS_OFF
-	.endif
-	jmp paranoid_userspace\trace
+	jmp paranoid_userspace
 	CFI_ENDPROC
-	.endm
+END(paranoid_exit)
 
 /*
  * Exception entry point. This expects an error code/orig_rax on the stack.
@@ -1326,20 +1336,63 @@ KPROBE_ENTRY(debug)
 	pushq $0
 	CFI_ADJUST_CFA_OFFSET 8
 	paranoidentry do_debug, DEBUG_STACK
-	paranoidexit
+	jmp paranoid_exit
+ 	CFI_ENDPROC
 KPROBE_END(debug)
 
 	/* runs on exception stack */
 KPROBE_ENTRY(nmi)
 	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
-	pushq $-1
-	CFI_ADJUST_CFA_OFFSET 8
-	paranoidentry do_nmi, 0, 0
+	pushq_cfi $-1
+	subq $15*8, %rsp
+	CFI_ADJUST_CFA_OFFSET 15*8
+	call save_paranoid
+	DEFAULT_FRAME 0
+	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+	movq %rsp,%rdi
+	movq ORIG_RAX(%rsp),%rsi
+	movq $-1,ORIG_RAX(%rsp)
+	call do_nmi
+	DISABLE_INTERRUPTS(CLBR_NONE)
 #ifdef CONFIG_TRACE_IRQFLAGS
-	paranoidexit 0
+	/* paranoidexit; without TRACE_IRQS_OFF */
+	/* ebx:	no swapgs flag */
+nmi_exit:
+	testl %ebx,%ebx				/* swapgs needed? */
+	jnz nmi_restore
+	testl $3,CS(%rsp)
+	jnz nmi_userspace
+nmi_swapgs:
+	SWAPGS_UNSAFE_STACK
+nmi_restore:
+	RESTORE_ALL 8
+	jmp irq_return
+nmi_userspace:
+	GET_THREAD_INFO(%rcx)
+	movl TI_flags(%rcx),%ebx
+	andl $_TIF_WORK_MASK,%ebx
+	jz nmi_swapgs
+	movq %rsp,%rdi			/* &pt_regs */
+	call sync_regs
+	movq %rax,%rsp			/* switch stack for scheduling */
+	testl $_TIF_NEED_RESCHED,%ebx
+	jnz nmi_schedule
+	movl %ebx,%edx			/* arg3: thread flags */
+	ENABLE_INTERRUPTS(CLBR_NONE)
+	xorl %esi,%esi 			/* arg2: oldset */
+	movq %rsp,%rdi 			/* arg1: &pt_regs */
+	call do_notify_resume
+	DISABLE_INTERRUPTS(CLBR_NONE)
+	jmp nmi_userspace
+nmi_schedule:
+	ENABLE_INTERRUPTS(CLBR_ANY)
+	call schedule
+	DISABLE_INTERRUPTS(CLBR_ANY)
+	jmp nmi_userspace
+	CFI_ENDPROC
 #else
-	jmp paranoid_exit1
+	jmp paranoid_exit
  	CFI_ENDPROC
 #endif
 KPROBE_END(nmi)
@@ -1350,7 +1403,7 @@ KPROBE_ENTRY(int3)
  	pushq $0
  	CFI_ADJUST_CFA_OFFSET 8
  	paranoidentry do_int3, DEBUG_STACK
- 	jmp paranoid_exit1
+ 	jmp paranoid_exit
  	CFI_ENDPROC
 KPROBE_END(int3)
 
@@ -1375,7 +1428,7 @@ ENTRY(double_fault)
 	XCPT_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	paranoidentry do_double_fault
-	jmp paranoid_exit1
+	jmp paranoid_exit
 	CFI_ENDPROC
 END(double_fault)
 
@@ -1392,7 +1445,7 @@ ENTRY(stack_segment)
 	XCPT_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	paranoidentry do_stack_segment
-	jmp paranoid_exit1
+	jmp paranoid_exit
 	CFI_ENDPROC
 END(stack_segment)
 
@@ -1420,7 +1473,7 @@ ENTRY(machine_check)
 	pushq $0
 	CFI_ADJUST_CFA_OFFSET 8
 	paranoidentry do_machine_check
-	jmp paranoid_exit1
+	jmp paranoid_exit
 	CFI_ENDPROC
 END(machine_check)
 #endif
-- 
cgit v1.2.3-70-g09d2


From b8b1d08bf6fe7c09e6cb2294bc0e5e964b361241 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Fri, 21 Nov 2008 16:44:28 +0100
Subject: x86: entry_64.S: split out some macro's and move common code to
 paranoid_exit

Impact: cleanup

DISABLE_INTERRUPTS(CLBR_NONE)/TRACE_IRQS_OFF is now always
executed just before paranoid_exit. Move it there.

Split out paranoidzeroentry, paranoiderrorentry, and
paranoidzeroentry_ist to get more readable macro's.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S | 102 ++++++++++++++++++++++-----------------------
 1 file changed, 51 insertions(+), 51 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index fad777b11366..692c1da61905 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1026,6 +1026,39 @@ END(spurious_interrupt)
 	CFI_ENDPROC
 	.endm
 
+	.macro paranoidzeroentry sym
+	INTR_FRAME
+	PARAVIRT_ADJUST_EXCEPTION_FRAME
+	pushq $-1		/* ORIG_RAX: no syscall to restart */
+	CFI_ADJUST_CFA_OFFSET 8
+	subq $15*8, %rsp
+	call save_paranoid
+	TRACE_IRQS_OFF
+	movq %rsp,%rdi		/* pt_regs pointer */
+	xorl %esi,%esi		/* no error code */
+	call \sym
+	jmp paranoid_exit	/* %ebx: no swapgs flag */
+	CFI_ENDPROC
+	.endm
+
+	.macro paranoidzeroentry_ist sym ist
+ 	INTR_FRAME
+	PARAVIRT_ADJUST_EXCEPTION_FRAME
+	pushq $-1		/* ORIG_RAX: no syscall to restart */
+	CFI_ADJUST_CFA_OFFSET 8
+	subq $15*8, %rsp
+	call save_paranoid
+	TRACE_IRQS_OFF
+	movq %rsp,%rdi		/* pt_regs pointer */
+	xorl %esi,%esi		/* no error code */
+	movq %gs:pda_data_offset, %rbp
+	subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+	call \sym
+	addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+	jmp paranoid_exit	/* %ebx: no swapgs flag */
+	CFI_ENDPROC
+	.endm
+
 	.macro errorentry sym
 	XCPT_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
@@ -1042,27 +1075,20 @@ END(spurious_interrupt)
 	.endm
 
 	/* error code is on the stack already */
-	.macro paranoidentry sym ist=0
-	subq $15*8, %rsp
+	.macro paranoiderrorentry sym
+	XCPT_FRAME
+	PARAVIRT_ADJUST_EXCEPTION_FRAME
+	subq $15*8,%rsp
 	CFI_ADJUST_CFA_OFFSET 15*8
 	call save_paranoid
 	DEFAULT_FRAME 0
-	.if \ist
-	movq	%gs:pda_data_offset, %rbp
-	.endif
 	TRACE_IRQS_OFF
-	movq %rsp,%rdi
-	movq ORIG_RAX(%rsp),%rsi
-	movq $-1,ORIG_RAX(%rsp)
-	.if \ist
-	subq	$EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
-	.endif
+	movq %rsp,%rdi			/* pt_regs pointer */
+	movq ORIG_RAX(%rsp),%rsi	/* get error code */
+	movq $-1,ORIG_RAX(%rsp)		/* no syscall to restart */
 	call \sym
-	.if \ist
-	addq	$EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
-	.endif
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
+	jmp paranoid_exit		/* %ebx: no swapgs flag */
+	CFI_ENDPROC
 	.endm
 
 	/*
@@ -1081,6 +1107,8 @@ END(spurious_interrupt)
 	/* ebx:	no swapgs flag */
 KPROBE_ENTRY(paranoid_exit)
 	INTR_FRAME
+	DISABLE_INTERRUPTS(CLBR_NONE)
+	TRACE_IRQS_OFF
 	testl %ebx,%ebx				/* swapgs needed? */
 	jnz paranoid_restore
 	testl $3,CS(%rsp)
@@ -1331,13 +1359,7 @@ END(device_not_available)
 
 	/* runs on exception stack */
 KPROBE_ENTRY(debug)
- 	INTR_FRAME
-	PARAVIRT_ADJUST_EXCEPTION_FRAME
-	pushq $0
-	CFI_ADJUST_CFA_OFFSET 8
-	paranoidentry do_debug, DEBUG_STACK
-	jmp paranoid_exit
- 	CFI_ENDPROC
+	paranoidzeroentry_ist do_debug, DEBUG_STACK
 KPROBE_END(debug)
 
 	/* runs on exception stack */
@@ -1351,14 +1373,12 @@ KPROBE_ENTRY(nmi)
 	DEFAULT_FRAME 0
 	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
 	movq %rsp,%rdi
-	movq ORIG_RAX(%rsp),%rsi
-	movq $-1,ORIG_RAX(%rsp)
+	movq $-1,%rsi
 	call do_nmi
-	DISABLE_INTERRUPTS(CLBR_NONE)
 #ifdef CONFIG_TRACE_IRQFLAGS
 	/* paranoidexit; without TRACE_IRQS_OFF */
 	/* ebx:	no swapgs flag */
-nmi_exit:
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	testl %ebx,%ebx				/* swapgs needed? */
 	jnz nmi_restore
 	testl $3,CS(%rsp)
@@ -1398,13 +1418,7 @@ nmi_schedule:
 KPROBE_END(nmi)
 
 KPROBE_ENTRY(int3)
- 	INTR_FRAME
-	PARAVIRT_ADJUST_EXCEPTION_FRAME
- 	pushq $0
- 	CFI_ADJUST_CFA_OFFSET 8
- 	paranoidentry do_int3, DEBUG_STACK
- 	jmp paranoid_exit
- 	CFI_ENDPROC
+	paranoidzeroentry_ist do_int3, DEBUG_STACK
 KPROBE_END(int3)
 
 ENTRY(overflow)
@@ -1425,11 +1439,7 @@ END(coprocessor_segment_overrun)
 
 	/* runs on exception stack */
 ENTRY(double_fault)
-	XCPT_FRAME
-	PARAVIRT_ADJUST_EXCEPTION_FRAME
-	paranoidentry do_double_fault
-	jmp paranoid_exit
-	CFI_ENDPROC
+	paranoiderrorentry do_double_fault
 END(double_fault)
 
 ENTRY(invalid_TSS)
@@ -1442,11 +1452,7 @@ END(segment_not_present)
 
 	/* runs on exception stack */
 ENTRY(stack_segment)
-	XCPT_FRAME
-	PARAVIRT_ADJUST_EXCEPTION_FRAME
-	paranoidentry do_stack_segment
-	jmp paranoid_exit
-	CFI_ENDPROC
+	paranoiderrorentry do_stack_segment
 END(stack_segment)
 
 KPROBE_ENTRY(general_protection)
@@ -1468,13 +1474,7 @@ END(spurious_interrupt_bug)
 #ifdef CONFIG_X86_MCE
 	/* runs on exception stack */
 ENTRY(machine_check)
-	INTR_FRAME
-	PARAVIRT_ADJUST_EXCEPTION_FRAME
-	pushq $0
-	CFI_ADJUST_CFA_OFFSET 8
-	paranoidentry do_machine_check
-	jmp paranoid_exit
-	CFI_ENDPROC
+	paranoidzeroentry do_machine_check
 END(machine_check)
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From c81084114f6ff957bc6b5a0048350479c1c1f7b3 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Fri, 21 Nov 2008 22:59:52 +0100
Subject: x86: split out some macro's and move common code to paranoid_exit,
 fix

Impact: fix bootup crash

Even though it tested fine for me, there was still a bug in the
first patch: I have overlooked a call to ptregscall_common. This
patch fixes that, I think, but the code is never executed for
me while running a debian install... (I tested this by putting
an "1:jmp 1b" in there.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 692c1da61905..e5ddf573ded2 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -529,10 +529,13 @@ sysret_signal:
 	jc sysret_audit
 #endif
 	/* edx:	work flags (arg3) */
-	leaq do_notify_resume(%rip),%rax
 	leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
 	xorl %esi,%esi # oldset -> arg2
-	call ptregscall_common
+	SAVE_REST
+	FIXUP_TOP_OF_STACK %r11
+	call do_notify_resume
+	RESTORE_TOP_OF_STACK %r11
+	RESTORE_REST
 	movl $_TIF_WORK_MASK,%edi
 	/* Use IRET because user could have changed frame. This
 	   works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
-- 
cgit v1.2.3-70-g09d2


From 3889d0cea2b73049bdca062d9ff1e5d33468289c Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Sat, 22 Nov 2008 23:39:23 -0800
Subject: x86: revert default reboot method to REBOOT_KBD

Impact: Reverts default reboot method.

Checkin 14d7ca5c575853664d8fe4f225a77b8df1b7de7d changed the default
reboot method to "pci", a.k.a. port CF9.  Unfortunately this has been
shown to cause lockups on at least two systems for which REBOOT_KBD
worked, both Thinkpads with Intel chipsets.  This reverts the default
to REBOOT_KBD, while leaving the option to have "reboot=pci" specified
explicitly or via a DMI match.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/reboot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index ddc93891cdcd..790b09fbadcb 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -29,7 +29,7 @@ EXPORT_SYMBOL(pm_power_off);
 
 static const struct desc_ptr no_idt = {};
 static int reboot_mode;
-enum reboot_type reboot_type = BOOT_CF9_COND;
+enum reboot_type reboot_type = BOOT_KBD;
 int reboot_force;
 
 #if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
-- 
cgit v1.2.3-70-g09d2


From 3aeb95d5b7839708a8d8e11aa274ee4d0d4042cc Mon Sep 17 00:00:00 2001
From: jia zhang <jia.zhang2008@gmail.com>
Date: Sun, 23 Nov 2008 09:51:41 +0800
Subject: x86_64: fix the check in stack_overflow_check

Impact: make stack overflow debug check and printout narrower

stack_overflow_check() should consider the stack usage of pt_regs, and
thus it could warn us in advance. Additionally, it looks better for
the warning time to start at INITIAL_JIFFIES.

Assuming that rsp gets close to the check point before interrupt
arrives: when interrupt really happens, thread_info will be partly
overrode.

Signed-off-by: jia zhang <jia.zhang2008@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irq_64.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 60eb84eb77a0..b842fc82be15 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -29,11 +29,12 @@
 static inline void stack_overflow_check(struct pt_regs *regs)
 {
 	u64 curbase = (u64)task_stack_page(current);
-	static unsigned long warned = -60*HZ;
+	static unsigned long warned = INITIAL_JIFFIES - 60*HZ;
 
 	if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE &&
-	    regs->sp <  curbase + sizeof(struct thread_info) + 128 &&
-	    time_after(jiffies, warned + 60*HZ)) {
+			regs->sp < curbase + sizeof(struct thread_info) +
+			sizeof(struct pt_regs) + 128 &&
+			time_after(jiffies, warned + 60*HZ)) {
 		printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
 		       current->comm, curbase, regs->sp);
 		show_stack(NULL,NULL);
-- 
cgit v1.2.3-70-g09d2


From f377fa123d0ec621e8e361ecc3f2a8ee70e81a2e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 23 Nov 2008 09:02:26 +0100
Subject: x86: clean up stack overflow debug check

Impact: cleanup

Simplify the irq-sampled stack overflow debug check:

 - eliminate an #idef

 - use WARN_ONCE() to emit a single warning (all bets are off
   after the first such warning anyway)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irq_64.c | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index b842fc82be15..1d3d0e71b044 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,7 +18,6 @@
 #include <asm/idle.h>
 #include <asm/smp.h>
 
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
 /*
  * Probabilistic stack overflow check:
  *
@@ -28,20 +27,18 @@
  */
 static inline void stack_overflow_check(struct pt_regs *regs)
 {
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
 	u64 curbase = (u64)task_stack_page(current);
-	static unsigned long warned = INITIAL_JIFFIES - 60*HZ;
-
-	if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE &&
-			regs->sp < curbase + sizeof(struct thread_info) +
-			sizeof(struct pt_regs) + 128 &&
-			time_after(jiffies, warned + 60*HZ)) {
-		printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
-		       current->comm, curbase, regs->sp);
-		show_stack(NULL,NULL);
-		warned = jiffies;
-	}
-}
+
+	WARN_ONCE(regs->sp >= curbase &&
+		  regs->sp <= curbase + THREAD_SIZE &&
+		  regs->sp <  curbase + sizeof(struct thread_info) +
+					sizeof(struct pt_regs) + 128,
+
+		  "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
+			current->comm, curbase, regs->sp);
 #endif
+}
 
 /*
  * do_IRQ handles all normal device IRQ's (the special
@@ -61,9 +58,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 	irq_enter();
 	irq = __get_cpu_var(vector_irq)[vector];
 
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
 	stack_overflow_check(regs);
-#endif
 
 	desc = irq_to_desc(irq);
 	if (likely(desc))
-- 
cgit v1.2.3-70-g09d2


From 5c9b3a0c7b8be3cdef3d7418f0a49127e7cdc998 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Fri, 21 Nov 2008 17:36:41 -0800
Subject: x86: signal: cosmetic unification of including headers

Impact: cleanup

Make the headers portion of signal_32.c and signal_64.c the same.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 27 +++++++++++++++------------
 arch/x86/kernel/signal_64.c |  7 ++++++-
 2 files changed, 21 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index c2aabeba27a5..0ff8d8750a7d 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -4,29 +4,32 @@
  *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
  *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
  */
-#include <linux/list.h>
 
-#include <linux/personality.h>
-#include <linux/binfmts.h>
-#include <linux/suspend.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
 #include <linux/kernel.h>
-#include <linux/ptrace.h>
 #include <linux/signal.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
 #include <linux/errno.h>
-#include <linux/sched.h>
 #include <linux/wait.h>
+#include <linux/ptrace.h>
 #include <linux/tracehook.h>
-#include <linux/elf.h>
-#include <linux/smp.h>
-#include <linux/mm.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/personality.h>
+#include <linux/uaccess.h>
 
 #include <asm/processor.h>
 #include <asm/ucontext.h>
-#include <asm/uaccess.h>
 #include <asm/i387.h>
 #include <asm/vdso.h>
+
+#ifdef CONFIG_X86_64
+#include <asm/proto.h>
+#include <asm/ia32_unistd.h>
+#include <asm/mce.h>
+#endif /* CONFIG_X86_64 */
+
 #include <asm/syscall.h>
 #include <asm/syscalls.h>
 
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 3d54d366ccb2..c52244ac19fc 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -19,17 +19,22 @@
 #include <linux/unistd.h>
 #include <linux/stddef.h>
 #include <linux/personality.h>
-#include <linux/compiler.h>
 #include <linux/uaccess.h>
 
 #include <asm/processor.h>
 #include <asm/ucontext.h>
 #include <asm/i387.h>
+#include <asm/vdso.h>
+
+#ifdef CONFIG_X86_64
 #include <asm/proto.h>
 #include <asm/ia32_unistd.h>
 #include <asm/mce.h>
+#endif /* CONFIG_X86_64 */
+
 #include <asm/syscall.h>
 #include <asm/syscalls.h>
+
 #include "sigframe.h"
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-- 
cgit v1.2.3-70-g09d2


From 666ac7be049ec290625e65d5922ff59f7bdec527 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Fri, 21 Nov 2008 17:38:25 -0800
Subject: x86: signal: cosmetic unification of sys_sigaltstack()

Impact: cleanup

Add #ifdef directive for unification.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c |  9 +++++++++
 arch/x86/kernel/signal_64.c | 15 +++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 0ff8d8750a7d..d9909881ac66 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -125,6 +125,7 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
 	return ret;
 }
 
+#ifdef CONFIG_X86_32
 asmlinkage int sys_sigaltstack(unsigned long bx)
 {
 	/*
@@ -137,6 +138,14 @@ asmlinkage int sys_sigaltstack(unsigned long bx)
 
 	return do_sigaltstack(uss, uoss, regs->sp);
 }
+#else /* !CONFIG_X86_32 */
+asmlinkage long
+sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+		struct pt_regs *regs)
+{
+	return do_sigaltstack(uss, uoss, regs->sp);
+}
+#endif /* CONFIG_X86_32 */
 
 #define COPY(x)			{		\
 	err |= __get_user(regs->x, &sc->x);	\
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index c52244ac19fc..b6e4fe03a36b 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -50,12 +50,27 @@
 # define FIX_EFLAGS	__FIX_EFLAGS
 #endif
 
+#ifdef CONFIG_X86_32
+asmlinkage int sys_sigaltstack(unsigned long bx)
+{
+	/*
+	 * This is needed to make gcc realize it doesn't own the
+	 * "struct pt_regs"
+	 */
+	struct pt_regs *regs = (struct pt_regs *)&bx;
+	const stack_t __user *uss = (const stack_t __user *)bx;
+	stack_t __user *uoss = (stack_t __user *)regs->cx;
+
+	return do_sigaltstack(uss, uoss, regs->sp);
+}
+#else /* !CONFIG_X86_32 */
 asmlinkage long
 sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 		struct pt_regs *regs)
 {
 	return do_sigaltstack(uss, uoss, regs->sp);
 }
+#endif /* CONFIG_X86_32 */
 
 #define COPY(x)			{		\
 	err |= __get_user(regs->x, &sc->x);	\
-- 
cgit v1.2.3-70-g09d2


From 2456d738ef051f85170bf018faef63f83fa84eb5 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Fri, 21 Nov 2008 17:38:57 -0800
Subject: x86: signal: cosmetic unification of sys_rt_sigreturn()

Impact: cleanup

Add #ifdef directive for unification.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 7 +++++++
 arch/x86/kernel/signal_64.c | 9 +++++++++
 2 files changed, 16 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index d9909881ac66..f7dd6c44c042 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -299,12 +299,19 @@ badframe:
 	return 0;
 }
 
+#ifdef CONFIG_X86_32
 asmlinkage int sys_rt_sigreturn(unsigned long __unused)
 {
 	struct pt_regs *regs = (struct pt_regs *)&__unused;
 
 	return do_rt_sigreturn(regs);
 }
+#else /* !CONFIG_X86_32 */
+asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
+{
+	return do_rt_sigreturn(regs);
+}
+#endif /* CONFIG_X86_32 */
 
 /*
  * Set up a signal frame.
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index b6e4fe03a36b..32718f5e4f61 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -169,10 +169,19 @@ badframe:
 	return 0;
 }
 
+#ifdef CONFIG_X86_32
+asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+{
+	struct pt_regs *regs = (struct pt_regs *)&__unused;
+
+	return do_rt_sigreturn(regs);
+}
+#else /* !CONFIG_X86_32 */
 asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
 {
 	return do_rt_sigreturn(regs);
 }
+#endif /* CONFIG_X86_32 */
 
 /*
  * Set up a signal frame.
-- 
cgit v1.2.3-70-g09d2


From c450d7805b2c5cac8846c5f490fddfd9030d2207 Mon Sep 17 00:00:00 2001
From: Hannes Eder <hannes@hanneseder.net>
Date: Fri, 21 Nov 2008 23:17:09 +0100
Subject: x86: vmware - fix sparse warnings

Impact: fix sparse build warning

Fix the following sparse warnings:

arch/x86/kernel/cpu/vmware.c:69:5: warning: symbol 'vmware_platform'
was not declared. Should it be static?
arch/x86/kernel/cpu/vmware.c:89:15: warning: symbol
'vmware_get_tsc_khz' was not declared. Should it be static?
arch/x86/kernel/cpu/vmware.c:107:16: warning: symbol
'vmware_set_feature_bits' was not declared. Should it be static?

Signed-off-by: Hannes Eder <hannes@hanneseder.net>
Cc: "Alok N Kataria" <akataria@vmware.com>
Cc: "Dan Hecht" <dhecht@vmware.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/vmware.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index c034bda842d9..284c399e3234 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -23,6 +23,7 @@
 
 #include <linux/dmi.h>
 #include <asm/div64.h>
+#include <asm/vmware.h>
 
 #define CPUID_VMWARE_INFO_LEAF	0x40000000
 #define VMWARE_HYPERVISOR_MAGIC	0x564D5868
-- 
cgit v1.2.3-70-g09d2


From 4e42ebd57b2e727b28bf5f6068e95cd19b0e807b Mon Sep 17 00:00:00 2001
From: Hannes Eder <hannes@hanneseder.net>
Date: Fri, 21 Nov 2008 22:56:17 +0100
Subject: x86: hypervisor - fix sparse warnings

Impact: fix sparse build warning

Fix the following sparse warnings:

  arch/x86/kernel/cpu/hypervisor.c:37:15: warning: symbol
  'get_hypervisor_tsc_freq' was not declared. Should it be static?
  arch/x86/kernel/cpu/hypervisor.c:53:16: warning: symbol
  'init_hypervisor' was not declared. Should it be static?

Signed-off-by: Hannes Eder <hannes@hanneseder.net>
Cc: "Alok N Kataria" <akataria@vmware.com>
Cc: "Dan Hecht" <dhecht@vmware.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/hypervisor.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 35ae2b75226d..fb5b86af0b01 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -23,6 +23,7 @@
 
 #include <asm/processor.h>
 #include <asm/vmware.h>
+#include <asm/hypervisor.h>
 
 static inline void __cpuinit
 detect_hypervisor_vendor(struct cpuinfo_x86 *c)
-- 
cgit v1.2.3-70-g09d2


From 8a2503fa4a6fae8ee42140b339f37373fc6acaae Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sun, 23 Nov 2008 14:53:43 +0300
Subject: x86: move dwarf2 related macro to dwarf2.h

Impact: cleanup

Move recently introduced dwarf2 macros to dwarf2.h file.
It allow us to not duplicate them in assembly files.

Active usage of _cfi macros don't make assembly files
more obvious to understand but we already have a lot of
macros there which requires to search the definitions
of them *anyway*. But at least it make every cfi usage
one line shorter.

Also some code alignment is done.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/dwarf2.h | 97 +++++++++++++++++++++++++++++--------------
 arch/x86/kernel/entry_64.S    | 23 ----------
 2 files changed, 66 insertions(+), 54 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index 804b6e6be929..3afc5e87cfdd 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -6,56 +6,91 @@
 #endif
 
 /*
-   Macros for dwarf2 CFI unwind table entries.
-   See "as.info" for details on these pseudo ops. Unfortunately
-   they are only supported in very new binutils, so define them
-   away for older version.
+ * Macros for dwarf2 CFI unwind table entries.
+ * See "as.info" for details on these pseudo ops. Unfortunately
+ * they are only supported in very new binutils, so define them
+ * away for older version.
  */
 
 #ifdef CONFIG_AS_CFI
 
-#define CFI_STARTPROC .cfi_startproc
-#define CFI_ENDPROC .cfi_endproc
-#define CFI_DEF_CFA .cfi_def_cfa
-#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
-#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
-#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
-#define CFI_OFFSET .cfi_offset
-#define CFI_REL_OFFSET .cfi_rel_offset
-#define CFI_REGISTER .cfi_register
-#define CFI_RESTORE .cfi_restore
-#define CFI_REMEMBER_STATE .cfi_remember_state
-#define CFI_RESTORE_STATE .cfi_restore_state
-#define CFI_UNDEFINED .cfi_undefined
+#define CFI_STARTPROC		.cfi_startproc
+#define CFI_ENDPROC		.cfi_endproc
+#define CFI_DEF_CFA		.cfi_def_cfa
+#define CFI_DEF_CFA_REGISTER	.cfi_def_cfa_register
+#define CFI_DEF_CFA_OFFSET	.cfi_def_cfa_offset
+#define CFI_ADJUST_CFA_OFFSET	.cfi_adjust_cfa_offset
+#define CFI_OFFSET		.cfi_offset
+#define CFI_REL_OFFSET		.cfi_rel_offset
+#define CFI_REGISTER		.cfi_register
+#define CFI_RESTORE		.cfi_restore
+#define CFI_REMEMBER_STATE	.cfi_remember_state
+#define CFI_RESTORE_STATE	.cfi_restore_state
+#define CFI_UNDEFINED		.cfi_undefined
 
 #ifdef CONFIG_AS_CFI_SIGNAL_FRAME
-#define CFI_SIGNAL_FRAME .cfi_signal_frame
+#define CFI_SIGNAL_FRAME	.cfi_signal_frame
 #else
 #define CFI_SIGNAL_FRAME
 #endif
 
 #else
 
-/* Due to the structure of pre-exisiting code, don't use assembler line
-   comment character # to ignore the arguments. Instead, use a dummy macro. */
+/*
+ * Due to the structure of pre-exisiting code, don't use assembler line
+ * comment character # to ignore the arguments. Instead, use a dummy macro.
+ */
 .macro cfi_ignore a=0, b=0, c=0, d=0
 .endm
 
-#define CFI_STARTPROC	cfi_ignore
-#define CFI_ENDPROC	cfi_ignore
-#define CFI_DEF_CFA	cfi_ignore
+#define CFI_STARTPROC		cfi_ignore
+#define CFI_ENDPROC		cfi_ignore
+#define CFI_DEF_CFA		cfi_ignore
 #define CFI_DEF_CFA_REGISTER	cfi_ignore
 #define CFI_DEF_CFA_OFFSET	cfi_ignore
 #define CFI_ADJUST_CFA_OFFSET	cfi_ignore
-#define CFI_OFFSET	cfi_ignore
-#define CFI_REL_OFFSET	cfi_ignore
-#define CFI_REGISTER	cfi_ignore
-#define CFI_RESTORE	cfi_ignore
-#define CFI_REMEMBER_STATE cfi_ignore
-#define CFI_RESTORE_STATE cfi_ignore
-#define CFI_UNDEFINED cfi_ignore
-#define CFI_SIGNAL_FRAME cfi_ignore
+#define CFI_OFFSET		cfi_ignore
+#define CFI_REL_OFFSET		cfi_ignore
+#define CFI_REGISTER		cfi_ignore
+#define CFI_RESTORE		cfi_ignore
+#define CFI_REMEMBER_STATE	cfi_ignore
+#define CFI_RESTORE_STATE	cfi_ignore
+#define CFI_UNDEFINED		cfi_ignore
+#define CFI_SIGNAL_FRAME	cfi_ignore
 
 #endif
 
+/*
+ * An attempt to make CFI annotations more or less
+ * correct and shorter. It is implied that you know
+ * what you're doing if you use them.
+ */
+#ifdef __ASSEMBLY__
+#ifdef CONFIG_X86_64
+	.macro pushq_cfi reg
+	pushq \reg
+	CFI_ADJUST_CFA_OFFSET 8
+	.endm
+
+	.macro popq_cfi reg
+	popq \reg
+	CFI_ADJUST_CFA_OFFSET -8
+	.endm
+
+	.macro movq_cfi reg offset=0
+	movq %\reg, \offset(%rsp)
+	CFI_REL_OFFSET \reg, \offset
+	.endm
+
+	.macro movq_cfi_restore offset reg
+	movq \offset(%rsp), %\reg
+	CFI_RESTORE \reg
+	.endm
+#else /*!CONFIG_X86_64*/
+
+	/* 32bit defenitions are missed yet */
+
+#endif /*!CONFIG_X86_64*/
+#endif /*__ASSEMBLY__*/
+
 #endif /* _ASM_X86_DWARF2_H */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e5ddf573ded2..249eb604e71b 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -60,29 +60,6 @@
 #define __AUDIT_ARCH_LE	   0x40000000
 
 	.code64
-/*
- * Some macro's to hide the most frequently occuring CFI annotations.
- */
-	.macro pushq_cfi reg
-	pushq \reg
-	CFI_ADJUST_CFA_OFFSET 8
-	.endm
-
-	.macro popq_cfi reg
-	popq \reg
-	CFI_ADJUST_CFA_OFFSET -8
-	.endm
-
-	.macro movq_cfi reg offset=0
-	movq %\reg, \offset(%rsp)
-	CFI_REL_OFFSET \reg, \offset
-	.endm
-
-	.macro movq_cfi_restore offset reg
-	movq \offset(%rsp), %\reg
-	CFI_RESTORE \reg
-	.endm
-
 #ifdef CONFIG_FUNCTION_TRACER
 #ifdef CONFIG_DYNAMIC_FTRACE
 ENTRY(mcount)
-- 
cgit v1.2.3-70-g09d2


From 050dc6944b9ca2186f4729ab44e0da3743933941 Mon Sep 17 00:00:00 2001
From: Hannes Eder <hannes@hanneseder.net>
Date: Sun, 23 Nov 2008 13:35:48 +0100
Subject: x86: remove duplicate #define from 'cpufeature.h'

Impact: cleanup

Remove duplicate #define from 'cpufeature.h'.

This also fixes the following sparse warning:

 arch/x86/kernel/cpu/capflags.c:54:3: warning: Initializer entry defined twice
 arch/x86/kernel/cpu/capflags.c:58:3:   also defined here

Signed-off-by: Hannes Eder <hannes@hanneseder.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/cpufeature.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 694d1f8f1bee..5bce8ed02b44 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -80,7 +80,6 @@
 #define X86_FEATURE_UP		(3*32+ 9) /* smp kernel running on up */
 #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */
 #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_NOPL	(3*32+20) /* The NOPL (0F 1F) instructions */
 #define X86_FEATURE_PEBS	(3*32+12) /* Precise-Event Based Sampling */
 #define X86_FEATURE_BTS		(3*32+13) /* Branch Trace Store */
 #define X86_FEATURE_SYSCALL32	(3*32+14) /* "" syscall in ia32 userspace */
-- 
cgit v1.2.3-70-g09d2


From 322648d1ba75280d62f114d47048beb0b35f5047 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Sun, 23 Nov 2008 10:08:28 +0100
Subject: x86: include ENTRY/END in entry handlers in entry_64.S

Impact: cleanup of entry_64.S

Except for the order and the place of the functions, this
patch should not change the generated code.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S | 259 +++++++++++++++++++--------------------------
 1 file changed, 109 insertions(+), 150 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 249eb604e71b..1a856c0b21a8 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -922,76 +922,70 @@ END(common_interrupt)
 /*
  * APIC interrupts.
  */
-	.p2align 5
-
-	.macro apicinterrupt num,func
+.macro apicinterrupt num sym do_sym
+ENTRY(\sym)
 	INTR_FRAME
 	pushq $~(\num)
 	CFI_ADJUST_CFA_OFFSET 8
-	interrupt \func
+	interrupt \do_sym
 	jmp ret_from_intr
 	CFI_ENDPROC
-	.endm
+END(\sym)
+.endm
 
-ENTRY(thermal_interrupt)
-	apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
-END(thermal_interrupt)
+#ifdef CONFIG_SMP
+apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
+	irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
+#endif
 
-ENTRY(threshold_interrupt)
-	apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
-END(threshold_interrupt)
+apicinterrupt 220 \
+	uv_bau_message_intr1 uv_bau_message_interrupt
+apicinterrupt LOCAL_TIMER_VECTOR \
+	apic_timer_interrupt smp_apic_timer_interrupt
 
 #ifdef CONFIG_SMP
-ENTRY(reschedule_interrupt)
-	apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
-END(reschedule_interrupt)
-
-	.macro INVALIDATE_ENTRY num
-ENTRY(invalidate_interrupt\num)
-	apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
-END(invalidate_interrupt\num)
-	.endm
-
-	INVALIDATE_ENTRY 0
-	INVALIDATE_ENTRY 1
-	INVALIDATE_ENTRY 2
-	INVALIDATE_ENTRY 3
-	INVALIDATE_ENTRY 4
-	INVALIDATE_ENTRY 5
-	INVALIDATE_ENTRY 6
-	INVALIDATE_ENTRY 7
-
-ENTRY(call_function_interrupt)
-	apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
-END(call_function_interrupt)
-ENTRY(call_function_single_interrupt)
-	apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
-END(call_function_single_interrupt)
-ENTRY(irq_move_cleanup_interrupt)
-	apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
-END(irq_move_cleanup_interrupt)
+apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
+	invalidate_interrupt0 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \
+	invalidate_interrupt1 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \
+	invalidate_interrupt2 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \
+	invalidate_interrupt3 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \
+	invalidate_interrupt4 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \
+	invalidate_interrupt5 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \
+	invalidate_interrupt6 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
+	invalidate_interrupt7 smp_invalidate_interrupt
 #endif
 
-ENTRY(apic_timer_interrupt)
-	apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
-END(apic_timer_interrupt)
-
-ENTRY(uv_bau_message_intr1)
-	apicinterrupt 220,uv_bau_message_interrupt
-END(uv_bau_message_intr1)
+apicinterrupt THRESHOLD_APIC_VECTOR \
+	threshold_interrupt mce_threshold_interrupt
+apicinterrupt THERMAL_APIC_VECTOR \
+	thermal_interrupt smp_thermal_interrupt
 
-ENTRY(error_interrupt)
-	apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
-END(error_interrupt)
+#ifdef CONFIG_SMP
+apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
+	call_function_single_interrupt smp_call_function_single_interrupt
+apicinterrupt CALL_FUNCTION_VECTOR \
+	call_function_interrupt smp_call_function_interrupt
+apicinterrupt RESCHEDULE_VECTOR \
+	reschedule_interrupt smp_reschedule_interrupt
+#endif
 
-ENTRY(spurious_interrupt)
-	apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
-END(spurious_interrupt)
+apicinterrupt ERROR_APIC_VECTOR \
+	error_interrupt smp_error_interrupt
+apicinterrupt SPURIOUS_APIC_VECTOR \
+	spurious_interrupt smp_spurious_interrupt
 
 /*
  * Exception entry points.
  */
-	.macro zeroentry sym
+.macro zeroentry sym do_sym
+ENTRY(\sym)
 	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
@@ -1001,12 +995,14 @@ END(spurious_interrupt)
 	DEFAULT_FRAME 0
 	movq %rsp,%rdi		/* pt_regs pointer */
 	xorl %esi,%esi		/* no error code */
-	call \sym
+	call \do_sym
 	jmp error_exit		/* %ebx: no swapgs flag */
 	CFI_ENDPROC
-	.endm
+END(\sym)
+.endm
 
-	.macro paranoidzeroentry sym
+.macro paranoidzeroentry sym do_sym
+KPROBE_ENTRY(\sym)
 	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	pushq $-1		/* ORIG_RAX: no syscall to restart */
@@ -1016,12 +1012,14 @@ END(spurious_interrupt)
 	TRACE_IRQS_OFF
 	movq %rsp,%rdi		/* pt_regs pointer */
 	xorl %esi,%esi		/* no error code */
-	call \sym
+	call \do_sym
 	jmp paranoid_exit	/* %ebx: no swapgs flag */
 	CFI_ENDPROC
-	.endm
+KPROBE_END(\sym)
+.endm
 
-	.macro paranoidzeroentry_ist sym ist
+.macro paranoidzeroentry_ist sym do_sym ist
+KPROBE_ENTRY(\sym)
  	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	pushq $-1		/* ORIG_RAX: no syscall to restart */
@@ -1033,13 +1031,19 @@ END(spurious_interrupt)
 	xorl %esi,%esi		/* no error code */
 	movq %gs:pda_data_offset, %rbp
 	subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
-	call \sym
+	call \do_sym
 	addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
 	jmp paranoid_exit	/* %ebx: no swapgs flag */
 	CFI_ENDPROC
-	.endm
+KPROBE_END(\sym)
+.endm
 
-	.macro errorentry sym
+.macro errorentry sym do_sym entry=0
+.if \entry
+KPROBE_ENTRY(\sym)
+.else
+ENTRY(\sym)
+.endif
 	XCPT_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	subq $15*8,%rsp
@@ -1049,13 +1053,23 @@ END(spurious_interrupt)
 	movq %rsp,%rdi			/* pt_regs pointer */
 	movq ORIG_RAX(%rsp),%rsi	/* get error code */
 	movq $-1,ORIG_RAX(%rsp)		/* no syscall to restart */
-	call \sym
+	call \do_sym
 	jmp error_exit			/* %ebx: no swapgs flag */
 	CFI_ENDPROC
-	.endm
+.if \entry
+KPROBE_END(\sym)
+.else
+END(\sym)
+.endif
+.endm
 
 	/* error code is on the stack already */
-	.macro paranoiderrorentry sym
+.macro paranoiderrorentry sym do_sym entry=1
+.if \entry
+KPROBE_ENTRY(\sym)
+.else
+ENTRY(\sym)
+.endif
 	XCPT_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	subq $15*8,%rsp
@@ -1066,10 +1080,37 @@ END(spurious_interrupt)
 	movq %rsp,%rdi			/* pt_regs pointer */
 	movq ORIG_RAX(%rsp),%rsi	/* get error code */
 	movq $-1,ORIG_RAX(%rsp)		/* no syscall to restart */
-	call \sym
+	call \do_sym
 	jmp paranoid_exit		/* %ebx: no swapgs flag */
 	CFI_ENDPROC
-	.endm
+.if \entry
+KPROBE_END(\sym)
+.else
+END(\sym)
+.endif
+.endm
+
+zeroentry divide_error do_divide_error
+paranoidzeroentry_ist debug do_debug DEBUG_STACK
+paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
+zeroentry overflow do_overflow
+zeroentry bounds do_bounds
+zeroentry invalid_op do_invalid_op
+zeroentry device_not_available do_device_not_available
+paranoiderrorentry double_fault do_double_fault 0
+zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
+errorentry invalid_TSS do_invalid_TSS
+errorentry segment_not_present do_segment_not_present
+paranoiderrorentry stack_segment do_stack_segment
+errorentry general_protection do_general_protection 1
+errorentry page_fault do_page_fault 1
+zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
+zeroentry coprocessor_error do_coprocessor_error
+errorentry alignment_check do_alignment_check
+#ifdef CONFIG_X86_MCE
+paranoidzeroentry machine_check do_machine_check
+#endif
+zeroentry simd_coprocessor_error do_simd_coprocessor_error
 
 	/*
  	 * "Paranoid" exit path from exception stack.
@@ -1321,26 +1362,7 @@ ENTRY(kernel_execve)
 	CFI_ENDPROC
 ENDPROC(kernel_execve)
 
-KPROBE_ENTRY(page_fault)
-	errorentry do_page_fault
-KPROBE_END(page_fault)
 
-ENTRY(coprocessor_error)
-	zeroentry do_coprocessor_error
-END(coprocessor_error)
-
-ENTRY(simd_coprocessor_error)
-	zeroentry do_simd_coprocessor_error
-END(simd_coprocessor_error)
-
-ENTRY(device_not_available)
-	zeroentry do_device_not_available
-END(device_not_available)
-
-	/* runs on exception stack */
-KPROBE_ENTRY(debug)
-	paranoidzeroentry_ist do_debug, DEBUG_STACK
-KPROBE_END(debug)
 
 	/* runs on exception stack */
 KPROBE_ENTRY(nmi)
@@ -1397,67 +1419,6 @@ nmi_schedule:
 #endif
 KPROBE_END(nmi)
 
-KPROBE_ENTRY(int3)
-	paranoidzeroentry_ist do_int3, DEBUG_STACK
-KPROBE_END(int3)
-
-ENTRY(overflow)
-	zeroentry do_overflow
-END(overflow)
-
-ENTRY(bounds)
-	zeroentry do_bounds
-END(bounds)
-
-ENTRY(invalid_op)
-	zeroentry do_invalid_op
-END(invalid_op)
-
-ENTRY(coprocessor_segment_overrun)
-	zeroentry do_coprocessor_segment_overrun
-END(coprocessor_segment_overrun)
-
-	/* runs on exception stack */
-ENTRY(double_fault)
-	paranoiderrorentry do_double_fault
-END(double_fault)
-
-ENTRY(invalid_TSS)
-	errorentry do_invalid_TSS
-END(invalid_TSS)
-
-ENTRY(segment_not_present)
-	errorentry do_segment_not_present
-END(segment_not_present)
-
-	/* runs on exception stack */
-ENTRY(stack_segment)
-	paranoiderrorentry do_stack_segment
-END(stack_segment)
-
-KPROBE_ENTRY(general_protection)
-	errorentry do_general_protection
-KPROBE_END(general_protection)
-
-ENTRY(alignment_check)
-	errorentry do_alignment_check
-END(alignment_check)
-
-ENTRY(divide_error)
-	zeroentry do_divide_error
-END(divide_error)
-
-ENTRY(spurious_interrupt_bug)
-	zeroentry do_spurious_interrupt_bug
-END(spurious_interrupt_bug)
-
-#ifdef CONFIG_X86_MCE
-	/* runs on exception stack */
-ENTRY(machine_check)
-	paranoidzeroentry do_machine_check
-END(machine_check)
-#endif
-
 /* Call softirq on interrupt stack. Interrupts are off. */
 ENTRY(call_softirq)
 	CFI_STARTPROC
@@ -1486,9 +1447,7 @@ KPROBE_ENTRY(ignore_sysret)
 ENDPROC(ignore_sysret)
 
 #ifdef CONFIG_XEN
-ENTRY(xen_hypervisor_callback)
-	zeroentry xen_do_hypervisor_callback
-END(xen_hypervisor_callback)
+zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
 
 /*
 # A note on the "critical region" in our callback handler.
-- 
cgit v1.2.3-70-g09d2


From 6efdcfaf16cc4fc76651603e083cf3ec4bd1e6de Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Sun, 23 Nov 2008 10:15:32 +0100
Subject: x86: KPROBE_ENTRY should be paired wth KPROBE_END

Impact: move some code out of .kprobes.text

KPROBE_ENTRY switches code generation to .kprobes.text, and KPROBE_END
uses .popsection to get back to the previous section (.text, normally).
Also replace ENDPROC by END, for consistency.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1a856c0b21a8..f2d546e16354 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1167,7 +1167,7 @@ paranoid_schedule:
 	TRACE_IRQS_OFF
 	jmp paranoid_userspace
 	CFI_ENDPROC
-END(paranoid_exit)
+KPROBE_END(paranoid_exit)
 
 /*
  * Exception entry point. This expects an error code/orig_rax on the stack.
@@ -1259,7 +1259,7 @@ gs_change:
 	CFI_ADJUST_CFA_OFFSET -8
         ret
 	CFI_ENDPROC
-ENDPROC(native_load_gs_index)
+END(native_load_gs_index)
 
         .section __ex_table,"a"
         .align 8
@@ -1313,7 +1313,7 @@ ENTRY(kernel_thread)
 	UNFAKE_STACK_FRAME
 	ret
 	CFI_ENDPROC
-ENDPROC(kernel_thread)
+END(kernel_thread)
 
 child_rip:
 	pushq $0		# fake return address
@@ -1329,7 +1329,7 @@ child_rip:
 	mov %eax, %edi
 	call do_exit
 	CFI_ENDPROC
-ENDPROC(child_rip)
+END(child_rip)
 
 /*
  * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
@@ -1360,9 +1360,7 @@ ENTRY(kernel_execve)
 	UNFAKE_STACK_FRAME
 	ret
 	CFI_ENDPROC
-ENDPROC(kernel_execve)
-
-
+END(kernel_execve)
 
 	/* runs on exception stack */
 KPROBE_ENTRY(nmi)
@@ -1437,14 +1435,14 @@ ENTRY(call_softirq)
 	decl %gs:pda_irqcount
 	ret
 	CFI_ENDPROC
-ENDPROC(call_softirq)
+END(call_softirq)
 
 KPROBE_ENTRY(ignore_sysret)
 	CFI_STARTPROC
 	mov $-ENOSYS,%eax
 	sysret
 	CFI_ENDPROC
-ENDPROC(ignore_sysret)
+KPROBE_END(ignore_sysret)
 
 #ifdef CONFIG_XEN
 zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
-- 
cgit v1.2.3-70-g09d2


From 3b6c52b5b634ae41d762cb174465272d69198160 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sun, 23 Nov 2008 20:21:39 +0300
Subject: x86: introduce ENTRY(KPROBE_ENTRY)_X86 assembly helpers to catch
 unbalanced declaration v3

Impact: make ENTRY()/END() macros more capable

It's usefull to catch unbalanced or messed or mixed declarations of ENTRY and
KPROBES. These macros would help a bit.

For example the following code would compile without problems

        ENTRY_X86(mcount)
                retq
        END_X86(mcount)

But if you forget and mess the following form

        ENTRY_X86(mcount)
                retq
        END(mcount)

        ENTRY_X86(ftrace_caller)

The assembler will issue the following message:
Error: ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed

Actually the checking is performed at every _X86 macro
so maybe it's good idea to put ENTRY_KPROBE_FINAL_X86
at the end of .S file to be sure you didn't miss anything.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Alexander van Heukelum <heukelum@mailshack.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/linkage.h | 60 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index f61ee8f937e4..5d98d0b68ffc 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -57,5 +57,65 @@
 #define __ALIGN_STR ".align 16,0x90"
 #endif
 
+/*
+ * to check ENTRY_X86/END_X86 and
+ * KPROBE_ENTRY_X86/KPROBE_END_X86
+ * unbalanced-missed-mixed appearance
+ */
+#define __set_entry_x86		.set ENTRY_X86_IN, 0
+#define __unset_entry_x86	.set ENTRY_X86_IN, 1
+#define __set_kprobe_x86	.set KPROBE_X86_IN, 0
+#define __unset_kprobe_x86	.set KPROBE_X86_IN, 1
+
+#define __macro_err_x86 .error "ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed"
+
+#define __check_entry_x86	\
+	.ifdef ENTRY_X86_IN;	\
+	.ifeq ENTRY_X86_IN;	\
+	__macro_err_x86;	\
+	.abort;			\
+	.endif;			\
+	.endif
+
+#define __check_kprobe_x86	\
+	.ifdef KPROBE_X86_IN;	\
+	.ifeq KPROBE_X86_IN;	\
+	__macro_err_x86;	\
+	.abort;			\
+	.endif;			\
+	.endif
+
+#define __check_entry_kprobe_x86	\
+	__check_entry_x86;		\
+	__check_kprobe_x86
+
+#define ENTRY_KPROBE_FINAL_X86 __check_entry_kprobe_x86
+
+#define ENTRY_X86(name)			\
+	__check_entry_kprobe_x86;	\
+	__set_entry_x86;		\
+	.globl name;			\
+	__ALIGN;			\
+	name:
+
+#define END_X86(name)			\
+	__unset_entry_x86;		\
+	__check_entry_kprobe_x86;	\
+	.size name, .-name
+
+#define KPROBE_ENTRY_X86(name)		\
+	__check_entry_kprobe_x86;	\
+	__set_kprobe_x86;		\
+	.pushsection .kprobes.text, "ax"; \
+	.globl name;			\
+	__ALIGN;			\
+	name:
+
+#define KPROBE_END_X86(name)		\
+	__unset_kprobe_x86;		\
+	__check_entry_kprobe_x86;	\
+	.size name, .-name;		\
+	.popsection
+
 #endif /* _ASM_X86_LINKAGE_H */
 
-- 
cgit v1.2.3-70-g09d2


From a1a00b58855ccdbedf556b4f5638d5208b454472 Mon Sep 17 00:00:00 2001
From: Hannes Eder <hannes@hanneseder.net>
Date: Sun, 23 Nov 2008 19:37:09 +0100
Subject: x86: boot - fix sparse warnings

Impact: make global variables static

Fix these sparse warnings:

 arch/x86/boot/video.c:233:3: warning: symbol 'saved' was not declared. Should it be static?
 arch/x86/boot/video-vga.c:37:13: warning: symbol 'video_vga' was not declared. Should it be static?

Signed-off-by: Hannes Eder <hannes@hanneseder.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/boot/video-vga.c | 4 ++--
 arch/x86/boot/video.c     | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index b939cb476dec..5d4742ed4aa2 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -34,7 +34,7 @@ static struct mode_info cga_modes[] = {
 	{ VIDEO_80x25,  80, 25, 0 },
 };
 
-__videocard video_vga;
+static __videocard video_vga;
 
 /* Set basic 80x25 mode */
 static u8 vga_set_basic_mode(void)
@@ -259,7 +259,7 @@ static int vga_probe(void)
 	return mode_count[adapter];
 }
 
-__videocard video_vga = {
+static __videocard video_vga = {
 	.card_name	= "VGA",
 	.probe		= vga_probe,
 	.set_mode	= vga_set_mode,
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index 83598b23093a..3bef2c1febe9 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -226,7 +226,7 @@ static unsigned int mode_menu(void)
 
 #ifdef CONFIG_VIDEO_RETAIN
 /* Save screen content to the heap */
-struct saved_screen {
+static struct saved_screen {
 	int x, y;
 	int curx, cury;
 	u16 *data;
-- 
cgit v1.2.3-70-g09d2


From 5f5db591326779a80cfe490c5d6b6ce9fac08b31 Mon Sep 17 00:00:00 2001
From: jia zhang <jia.zhang2008@gmail.com>
Date: Sun, 23 Nov 2008 22:47:10 +0800
Subject: x86, debug: remove the confusing entry in call trace

Impact: improve backtrace quality

avoid the confusion in call trace because of the lack of padding at the
tail of function.

When do_exit gets called, the return address behind call instruction is
pushed into stack. If something get wrong in do_exit, for x86_64, the
entry "kernel_execve +0x00/0xXX" rather than "child_rip +0xYY/0xZZ" is
in the call trace.

That looks confusing, so add a u2d to make the return address still part
of the original call site. (This also catches any instances of us returning
from that function somehow.)

Signed-off-by: jia zhang <jia.zhang2008@gmail.com>
Acked-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_32.S | 1 +
 arch/x86/kernel/entry_64.S | 1 +
 2 files changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 28b597ef9ca1..f6402c4ba10d 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1051,6 +1051,7 @@ ENTRY(kernel_thread_helper)
 	push %eax
 	CFI_ADJUST_CFA_OFFSET 4
 	call do_exit
+	ud2			# padding for call trace
 	CFI_ENDPROC
 ENDPROC(kernel_thread_helper)
 
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index ddeeb1052583..4a16bf31c783 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1172,6 +1172,7 @@ child_rip:
 	# exit
 	mov %eax, %edi
 	call do_exit
+	ud2			# padding for call trace
 	CFI_ENDPROC
 ENDPROC(child_rip)
 
-- 
cgit v1.2.3-70-g09d2


From 3b71e9e307b3406aa29960a7428247f8a48b810c Mon Sep 17 00:00:00 2001
From: Hannes Eder <hannes@hanneseder.net>
Date: Sun, 23 Nov 2008 20:19:33 +0100
Subject: x86: HPET: fix sparse warning

Impact: make global variable static

Fix this sparse warning:

 arch/x86/kernel/hpet.c:36:18: warning: symbol 'hpet_num_timers' was
 not declared. Should it be static?

Signed-off-by: Hannes Eder <hannes@hanneseder.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 067d8de913f6..15fcaacc1f84 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -33,7 +33,7 @@
  * HPET address is set in acpi/boot.c, when an ACPI entry exists
  */
 unsigned long				hpet_address;
-unsigned long				hpet_num_timers;
+static unsigned long			hpet_num_timers;
 static void __iomem			*hpet_virt_address;
 
 struct hpet_dev {
-- 
cgit v1.2.3-70-g09d2


From b47b92884212008b4bd044ba6b48b93c00b10ec6 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 24 Nov 2008 00:50:09 -0800
Subject: x86: drop REBOOT_CF9_COND from reboot fallback chain

Impact: Reverts sequence of reboot fallbacks

Checkin 14d7ca5c575853664d8fe4f225a77b8df1b7de7d changed the default
reboot method to "pci", a.k.a. port CF9.  Unfortunately this has been
shown to cause lockups on at least two systems for which REBOOT_KBD
worked, both Thinkpads with Intel chipsets.  Checkin
3889d0cea2b73049bdca062d9ff1e5d33468289c reverted the default, but did
not revert the fallback chain.  This checkin reverts the fallback
chain; port CF9 is now only done by explicit "reboot=pci" or a future
potential DMI key.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/reboot.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 790b09fbadcb..bb387ab0eea8 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -384,20 +384,20 @@ static void native_machine_emergency_restart(void)
 			load_idt(&no_idt);
 			__asm__ __volatile__("int3");
 
-			reboot_type = BOOT_CF9_COND;
+			reboot_type = BOOT_KBD;
 			break;
 
 #ifdef CONFIG_X86_32
 		case BOOT_BIOS:
 			machine_real_restart(jump_to_bios, sizeof(jump_to_bios));
 
-			reboot_type = BOOT_CF9_COND;
+			reboot_type = BOOT_KBD;
 			break;
 #endif
 
 		case BOOT_ACPI:
 			acpi_reboot();
-			reboot_type = BOOT_CF9_COND;
+			reboot_type = BOOT_KBD;
 			break;
 
 		case BOOT_EFI:
@@ -406,7 +406,7 @@ static void native_machine_emergency_restart(void)
 						 EFI_RESET_WARM :
 						 EFI_RESET_COLD,
 						 EFI_SUCCESS, 0, NULL);
-			reboot_type = BOOT_CF9_COND;
+			reboot_type = BOOT_KBD;
 			break;
 
 		case BOOT_CF9:
-- 
cgit v1.2.3-70-g09d2


From ad07e914e681f18ec0eaba60db17f497ee7e7e78 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 24 Nov 2008 11:33:12 +0100
Subject: x86 defconfig: increase CONFIG_LOG_BUF_SHIFT

Impact: double the defconfig printk buffer

Booting defconfigs produces more output than 128K so the output is
truncated - double it to 256K.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/configs/i386_defconfig   | 2 +-
 arch/x86/configs/x86_64_defconfig | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 13b8c86ae985..71fc39c70782 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -77,7 +77,7 @@ CONFIG_AUDIT=y
 CONFIG_AUDITSYSCALL=y
 CONFIG_AUDIT_TREE=y
 # CONFIG_IKCONFIG is not set
-CONFIG_LOG_BUF_SHIFT=17
+CONFIG_LOG_BUF_SHIFT=18
 CONFIG_CGROUPS=y
 # CONFIG_CGROUP_DEBUG is not set
 CONFIG_CGROUP_NS=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index f0a03d7a7d63..b38bbabc1706 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -77,7 +77,7 @@ CONFIG_AUDIT=y
 CONFIG_AUDITSYSCALL=y
 CONFIG_AUDIT_TREE=y
 # CONFIG_IKCONFIG is not set
-CONFIG_LOG_BUF_SHIFT=17
+CONFIG_LOG_BUF_SHIFT=18
 CONFIG_CGROUPS=y
 # CONFIG_CGROUP_DEBUG is not set
 CONFIG_CGROUP_NS=y
-- 
cgit v1.2.3-70-g09d2


From e951e4af2e399c46891004d4931333d2d8d520ab Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 25 Nov 2008 08:42:01 +0100
Subject: x86: fix unused variable warning in arch/x86/kernel/hpet.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Impact: fix build warning

this warning:

  arch/x86/kernel/hpet.c:36: warning: ‘hpet_num_timers’ defined but not used

Triggers because hpet_num_timers is unused in the !CONFIG_PCI_MSI case.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 15fcaacc1f84..3f0a3edf0a57 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -33,7 +33,9 @@
  * HPET address is set in acpi/boot.c, when an ACPI entry exists
  */
 unsigned long				hpet_address;
+#ifdef CONFIG_PCI_MSI
 static unsigned long			hpet_num_timers;
+#endif
 static void __iomem			*hpet_virt_address;
 
 struct hpet_dev {
-- 
cgit v1.2.3-70-g09d2


From 2601657d223d82053d4e1fe1063091401e6b860a Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Mon, 24 Nov 2008 18:21:37 -0800
Subject: x86: signal: move {setup|restore}_sigcontext()

Impact: cleanup

Move {setup|restore}_sigcontext() declaration onto head of file.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 271 ++++++++++++++++++++++----------------------
 arch/x86/kernel/signal_64.c | 148 ++++++++++++------------
 2 files changed, 210 insertions(+), 209 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index f7dd6c44c042..b3f30d2a2178 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -70,6 +70,142 @@ static const struct {
 	0
 };
 
+#define COPY(x)			{		\
+	err |= __get_user(regs->x, &sc->x);	\
+}
+
+#define COPY_SEG(seg)		{			\
+		unsigned short tmp;			\
+		err |= __get_user(tmp, &sc->seg);	\
+		regs->seg = tmp;			\
+}
+
+#define COPY_SEG_CPL3(seg)	{			\
+		unsigned short tmp;			\
+		err |= __get_user(tmp, &sc->seg);	\
+		regs->seg = tmp | 3;			\
+}
+
+#define GET_SEG(seg)		{			\
+		unsigned short tmp;			\
+		err |= __get_user(tmp, &sc->seg);	\
+		loadsegment(seg, tmp);			\
+}
+
+static int
+restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
+		   unsigned long *pax)
+{
+	void __user *buf;
+	unsigned int tmpflags;
+	unsigned int err = 0;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+#ifdef CONFIG_X86_32
+	GET_SEG(gs);
+	COPY_SEG(fs);
+	COPY_SEG(es);
+	COPY_SEG(ds);
+#endif /* CONFIG_X86_32 */
+
+	COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
+	COPY(dx); COPY(cx); COPY(ip);
+
+#ifdef CONFIG_X86_64
+	COPY(r8);
+	COPY(r9);
+	COPY(r10);
+	COPY(r11);
+	COPY(r12);
+	COPY(r13);
+	COPY(r14);
+	COPY(r15);
+#endif /* CONFIG_X86_64 */
+
+#ifdef CONFIG_X86_32
+	COPY_SEG_CPL3(cs);
+	COPY_SEG_CPL3(ss);
+#else /* !CONFIG_X86_32 */
+	/* Kernel saves and restores only the CS segment register on signals,
+	 * which is the bare minimum needed to allow mixed 32/64-bit code.
+	 * App's signal handler can save/restore other segments if needed. */
+	COPY_SEG_CPL3(cs);
+#endif /* CONFIG_X86_32 */
+
+	err |= __get_user(tmpflags, &sc->flags);
+	regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
+	regs->orig_ax = -1;		/* disable syscall checks */
+
+	err |= __get_user(buf, &sc->fpstate);
+	err |= restore_i387_xstate(buf);
+
+	err |= __get_user(*pax, &sc->ax);
+	return err;
+}
+
+static int
+setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
+		 struct pt_regs *regs, unsigned long mask)
+{
+	int err = 0;
+
+#ifdef CONFIG_X86_32
+	{
+		unsigned int tmp;
+
+		savesegment(gs, tmp);
+		err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
+	}
+	err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
+	err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
+	err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
+#endif /* CONFIG_X86_32 */
+
+	err |= __put_user(regs->di, &sc->di);
+	err |= __put_user(regs->si, &sc->si);
+	err |= __put_user(regs->bp, &sc->bp);
+	err |= __put_user(regs->sp, &sc->sp);
+	err |= __put_user(regs->bx, &sc->bx);
+	err |= __put_user(regs->dx, &sc->dx);
+	err |= __put_user(regs->cx, &sc->cx);
+	err |= __put_user(regs->ax, &sc->ax);
+#ifdef CONFIG_X86_64
+	err |= __put_user(regs->r8, &sc->r8);
+	err |= __put_user(regs->r9, &sc->r9);
+	err |= __put_user(regs->r10, &sc->r10);
+	err |= __put_user(regs->r11, &sc->r11);
+	err |= __put_user(regs->r12, &sc->r12);
+	err |= __put_user(regs->r13, &sc->r13);
+	err |= __put_user(regs->r14, &sc->r14);
+	err |= __put_user(regs->r15, &sc->r15);
+#endif /* CONFIG_X86_64 */
+
+	err |= __put_user(current->thread.trap_no, &sc->trapno);
+	err |= __put_user(current->thread.error_code, &sc->err);
+	err |= __put_user(regs->ip, &sc->ip);
+#ifdef CONFIG_X86_32
+	err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
+	err |= __put_user(regs->flags, &sc->flags);
+	err |= __put_user(regs->sp, &sc->sp_at_signal);
+	err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
+#else /* !CONFIG_X86_32 */
+	err |= __put_user(regs->flags, &sc->flags);
+	err |= __put_user(regs->cs, &sc->cs);
+	err |= __put_user(0, &sc->gs);
+	err |= __put_user(0, &sc->fs);
+#endif /* CONFIG_X86_32 */
+
+	err |= __put_user(fpstate, &sc->fpstate);
+
+	/* non-iBCS2 extensions.. */
+	err |= __put_user(mask, &sc->oldmask);
+	err |= __put_user(current->thread.cr2, &sc->cr2);
+
+	return err;
+}
+
 /*
  * Atomically swap in the new signal mask, and wait for a signal.
  */
@@ -147,84 +283,9 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 }
 #endif /* CONFIG_X86_32 */
 
-#define COPY(x)			{		\
-	err |= __get_user(regs->x, &sc->x);	\
-}
-
-#define COPY_SEG(seg)		{			\
-		unsigned short tmp;			\
-		err |= __get_user(tmp, &sc->seg);	\
-		regs->seg = tmp;			\
-}
-
-#define COPY_SEG_CPL3(seg)	{			\
-		unsigned short tmp;			\
-		err |= __get_user(tmp, &sc->seg);	\
-		regs->seg = tmp | 3;			\
-}
-
-#define GET_SEG(seg)		{			\
-		unsigned short tmp;			\
-		err |= __get_user(tmp, &sc->seg);	\
-		loadsegment(seg, tmp);			\
-}
-
 /*
  * Do a signal return; undo the signal stack.
  */
-static int
-restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
-		   unsigned long *pax)
-{
-	void __user *buf;
-	unsigned int tmpflags;
-	unsigned int err = 0;
-
-	/* Always make any pending restarted system calls return -EINTR */
-	current_thread_info()->restart_block.fn = do_no_restart_syscall;
-
-#ifdef CONFIG_X86_32
-	GET_SEG(gs);
-	COPY_SEG(fs);
-	COPY_SEG(es);
-	COPY_SEG(ds);
-#endif /* CONFIG_X86_32 */
-
-	COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
-	COPY(dx); COPY(cx); COPY(ip);
-
-#ifdef CONFIG_X86_64
-	COPY(r8);
-	COPY(r9);
-	COPY(r10);
-	COPY(r11);
-	COPY(r12);
-	COPY(r13);
-	COPY(r14);
-	COPY(r15);
-#endif /* CONFIG_X86_64 */
-
-#ifdef CONFIG_X86_32
-	COPY_SEG_CPL3(cs);
-	COPY_SEG_CPL3(ss);
-#else /* !CONFIG_X86_32 */
-	/* Kernel saves and restores only the CS segment register on signals,
-	 * which is the bare minimum needed to allow mixed 32/64-bit code.
-	 * App's signal handler can save/restore other segments if needed. */
-	COPY_SEG_CPL3(cs);
-#endif /* CONFIG_X86_32 */
-
-	err |= __get_user(tmpflags, &sc->flags);
-	regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
-	regs->orig_ax = -1;		/* disable syscall checks */
-
-	err |= __get_user(buf, &sc->fpstate);
-	err |= restore_i387_xstate(buf);
-
-	err |= __get_user(*pax, &sc->ax);
-	return err;
-}
-
 asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
 {
 	struct sigframe __user *frame;
@@ -316,66 +377,6 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
 /*
  * Set up a signal frame.
  */
-static int
-setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
-		 struct pt_regs *regs, unsigned long mask)
-{
-	int err = 0;
-
-#ifdef CONFIG_X86_32
-	{
-		unsigned int tmp;
-
-		savesegment(gs, tmp);
-		err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
-	}
-	err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
-	err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
-	err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
-#endif /* CONFIG_X86_32 */
-
-	err |= __put_user(regs->di, &sc->di);
-	err |= __put_user(regs->si, &sc->si);
-	err |= __put_user(regs->bp, &sc->bp);
-	err |= __put_user(regs->sp, &sc->sp);
-	err |= __put_user(regs->bx, &sc->bx);
-	err |= __put_user(regs->dx, &sc->dx);
-	err |= __put_user(regs->cx, &sc->cx);
-	err |= __put_user(regs->ax, &sc->ax);
-#ifdef CONFIG_X86_64
-	err |= __put_user(regs->r8, &sc->r8);
-	err |= __put_user(regs->r9, &sc->r9);
-	err |= __put_user(regs->r10, &sc->r10);
-	err |= __put_user(regs->r11, &sc->r11);
-	err |= __put_user(regs->r12, &sc->r12);
-	err |= __put_user(regs->r13, &sc->r13);
-	err |= __put_user(regs->r14, &sc->r14);
-	err |= __put_user(regs->r15, &sc->r15);
-#endif /* CONFIG_X86_64 */
-
-	err |= __put_user(current->thread.trap_no, &sc->trapno);
-	err |= __put_user(current->thread.error_code, &sc->err);
-	err |= __put_user(regs->ip, &sc->ip);
-#ifdef CONFIG_X86_32
-	err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
-	err |= __put_user(regs->flags, &sc->flags);
-	err |= __put_user(regs->sp, &sc->sp_at_signal);
-	err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
-#else /* !CONFIG_X86_32 */
-	err |= __put_user(regs->flags, &sc->flags);
-	err |= __put_user(regs->cs, &sc->cs);
-	err |= __put_user(0, &sc->gs);
-	err |= __put_user(0, &sc->fs);
-#endif /* CONFIG_X86_32 */
-
-	err |= __put_user(fpstate, &sc->fpstate);
-
-	/* non-iBCS2 extensions.. */
-	err |= __put_user(mask, &sc->oldmask);
-	err |= __put_user(current->thread.cr2, &sc->cr2);
-
-	return err;
-}
 
 /*
  * Determine which stack to use..
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 32718f5e4f61..771c8fcc8b0d 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -50,28 +50,6 @@
 # define FIX_EFLAGS	__FIX_EFLAGS
 #endif
 
-#ifdef CONFIG_X86_32
-asmlinkage int sys_sigaltstack(unsigned long bx)
-{
-	/*
-	 * This is needed to make gcc realize it doesn't own the
-	 * "struct pt_regs"
-	 */
-	struct pt_regs *regs = (struct pt_regs *)&bx;
-	const stack_t __user *uss = (const stack_t __user *)bx;
-	stack_t __user *uoss = (stack_t __user *)regs->cx;
-
-	return do_sigaltstack(uss, uoss, regs->sp);
-}
-#else /* !CONFIG_X86_32 */
-asmlinkage long
-sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
-		struct pt_regs *regs)
-{
-	return do_sigaltstack(uss, uoss, regs->sp);
-}
-#endif /* CONFIG_X86_32 */
-
 #define COPY(x)			{		\
 	err |= __get_user(regs->x, &sc->x);	\
 }
@@ -82,9 +60,6 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 		regs->seg = tmp | 3;			\
 }
 
-/*
- * Do a signal return; undo the signal stack.
- */
 static int
 restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 		   unsigned long *pax)
@@ -138,54 +113,6 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 	return err;
 }
 
-static long do_rt_sigreturn(struct pt_regs *regs)
-{
-	struct rt_sigframe __user *frame;
-	unsigned long ax;
-	sigset_t set;
-
-	frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
-		goto badframe;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
-		goto badframe;
-
-	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
-		goto badframe;
-
-	return ax;
-
-badframe:
-	signal_fault(regs, frame, "rt_sigreturn");
-	return 0;
-}
-
-#ifdef CONFIG_X86_32
-asmlinkage int sys_rt_sigreturn(unsigned long __unused)
-{
-	struct pt_regs *regs = (struct pt_regs *)&__unused;
-
-	return do_rt_sigreturn(regs);
-}
-#else /* !CONFIG_X86_32 */
-asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
-{
-	return do_rt_sigreturn(regs);
-}
-#endif /* CONFIG_X86_32 */
-
-/*
- * Set up a signal frame.
- */
 static int
 setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 		 struct pt_regs *regs, unsigned long mask)
@@ -247,10 +174,83 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 	return err;
 }
 
+#ifdef CONFIG_X86_32
+asmlinkage int sys_sigaltstack(unsigned long bx)
+{
+	/*
+	 * This is needed to make gcc realize it doesn't own the
+	 * "struct pt_regs"
+	 */
+	struct pt_regs *regs = (struct pt_regs *)&bx;
+	const stack_t __user *uss = (const stack_t __user *)bx;
+	stack_t __user *uoss = (stack_t __user *)regs->cx;
+
+	return do_sigaltstack(uss, uoss, regs->sp);
+}
+#else /* !CONFIG_X86_32 */
+asmlinkage long
+sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+		struct pt_regs *regs)
+{
+	return do_sigaltstack(uss, uoss, regs->sp);
+}
+#endif /* CONFIG_X86_32 */
+
 /*
- * Determine which stack to use..
+ * Do a signal return; undo the signal stack.
+ */
+static long do_rt_sigreturn(struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	unsigned long ax;
+	sigset_t set;
+
+	frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+		goto badframe;
+
+	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
+		goto badframe;
+
+	return ax;
+
+badframe:
+	signal_fault(regs, frame, "rt_sigreturn");
+	return 0;
+}
+
+#ifdef CONFIG_X86_32
+asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+{
+	struct pt_regs *regs = (struct pt_regs *)&__unused;
+
+	return do_rt_sigreturn(regs);
+}
+#else /* !CONFIG_X86_32 */
+asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
+{
+	return do_rt_sigreturn(regs);
+}
+#endif /* CONFIG_X86_32 */
+
+/*
+ * Set up a signal frame.
  */
 
+/*
+ * Determine which stack to use..
+ */
 static void __user *
 get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size)
 {
-- 
cgit v1.2.3-70-g09d2


From bfeb91a9435889ef4fe7bfbb4b673f625e69e790 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Mon, 24 Nov 2008 18:23:12 -0800
Subject: x86: signal: cosmetic unification of __setup_sigframe() and
 __setup_rt_sigframe()

Impact: cleanup

Add #ifdef directive to unify __setup_sigframe() and __setup_rt_sigframe().
Move them after {setup|restore}_sigcontext() declaration.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 469 ++++++++++++++++++++++++++------------------
 arch/x86/kernel/signal_64.c | 309 ++++++++++++++++++++++++-----
 2 files changed, 536 insertions(+), 242 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index b3f30d2a2178..e9f71298e746 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -48,28 +48,6 @@
 # define FIX_EFLAGS	__FIX_EFLAGS
 #endif
 
-static const struct {
-	u16 poplmovl;
-	u32 val;
-	u16 int80;
-} __attribute__((packed)) retcode = {
-	0xb858,		/* popl %eax; movl $..., %eax */
-	__NR_sigreturn,
-	0x80cd,		/* int $0x80 */
-};
-
-static const struct {
-	u8  movl;
-	u32 val;
-	u16 int80;
-	u8  pad;
-} __attribute__((packed)) rt_retcode = {
-	0xb8,		/* movl $..., %eax */
-	__NR_rt_sigreturn,
-	0x80cd,		/* int $0x80 */
-	0
-};
-
 #define COPY(x)			{		\
 	err |= __get_user(regs->x, &sc->x);	\
 }
@@ -207,176 +185,30 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 }
 
 /*
- * Atomically swap in the new signal mask, and wait for a signal.
+ * Set up a signal frame.
  */
-asmlinkage int
-sys_sigsuspend(int history0, int history1, old_sigset_t mask)
-{
-	mask &= _BLOCKABLE;
-	spin_lock_irq(&current->sighand->siglock);
-	current->saved_sigmask = current->blocked;
-	siginitset(&current->blocked, mask);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	current->state = TASK_INTERRUPTIBLE;
-	schedule();
-	set_restore_sigmask();
-
-	return -ERESTARTNOHAND;
-}
-
-asmlinkage int
-sys_sigaction(int sig, const struct old_sigaction __user *act,
-	      struct old_sigaction __user *oact)
-{
-	struct k_sigaction new_ka, old_ka;
-	int ret;
-
-	if (act) {
-		old_sigset_t mask;
-
-		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
-		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
-		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
-			return -EFAULT;
-
-		__get_user(new_ka.sa.sa_flags, &act->sa_flags);
-		__get_user(mask, &act->sa_mask);
-		siginitset(&new_ka.sa.sa_mask, mask);
-	}
-
-	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
-	if (!ret && oact) {
-		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
-		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
-		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
-			return -EFAULT;
-
-		__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
-		__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
-	}
-
-	return ret;
-}
-
 #ifdef CONFIG_X86_32
-asmlinkage int sys_sigaltstack(unsigned long bx)
-{
-	/*
-	 * This is needed to make gcc realize it doesn't own the
-	 * "struct pt_regs"
-	 */
-	struct pt_regs *regs = (struct pt_regs *)&bx;
-	const stack_t __user *uss = (const stack_t __user *)bx;
-	stack_t __user *uoss = (stack_t __user *)regs->cx;
-
-	return do_sigaltstack(uss, uoss, regs->sp);
-}
-#else /* !CONFIG_X86_32 */
-asmlinkage long
-sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
-		struct pt_regs *regs)
-{
-	return do_sigaltstack(uss, uoss, regs->sp);
-}
-#endif /* CONFIG_X86_32 */
-
-/*
- * Do a signal return; undo the signal stack.
- */
-asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
-{
-	struct sigframe __user *frame;
-	struct pt_regs *regs;
-	unsigned long ax;
-	sigset_t set;
-
-	regs = (struct pt_regs *) &__unused;
-	frame = (struct sigframe __user *)(regs->sp - 8);
-
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-	if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1
-		&& __copy_from_user(&set.sig[1], &frame->extramask,
-				    sizeof(frame->extramask))))
-		goto badframe;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	if (restore_sigcontext(regs, &frame->sc, &ax))
-		goto badframe;
-	return ax;
-
-badframe:
-	if (show_unhandled_signals && printk_ratelimit()) {
-		printk("%s%s[%d] bad frame in sigreturn frame:"
-			"%p ip:%lx sp:%lx oeax:%lx",
-		    task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
-		    current->comm, task_pid_nr(current), frame, regs->ip,
-		    regs->sp, regs->orig_ax);
-		print_vma_addr(" in ", regs->ip);
-		printk(KERN_CONT "\n");
-	}
-
-	force_sig(SIGSEGV, current);
-
-	return 0;
-}
-
-static long do_rt_sigreturn(struct pt_regs *regs)
-{
-	struct rt_sigframe __user *frame;
-	unsigned long ax;
-	sigset_t set;
-
-	frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
-		goto badframe;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
-		goto badframe;
-
-	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
-		goto badframe;
-
-	return ax;
-
-badframe:
-	signal_fault(regs, frame, "rt_sigreturn");
-	return 0;
-}
-
-#ifdef CONFIG_X86_32
-asmlinkage int sys_rt_sigreturn(unsigned long __unused)
-{
-	struct pt_regs *regs = (struct pt_regs *)&__unused;
-
-	return do_rt_sigreturn(regs);
-}
-#else /* !CONFIG_X86_32 */
-asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
-{
-	return do_rt_sigreturn(regs);
-}
-#endif /* CONFIG_X86_32 */
+static const struct {
+	u16 poplmovl;
+	u32 val;
+	u16 int80;
+} __attribute__((packed)) retcode = {
+	0xb858,		/* popl %eax; movl $..., %eax */
+	__NR_sigreturn,
+	0x80cd,		/* int $0x80 */
+};
 
-/*
- * Set up a signal frame.
- */
+static const struct {
+	u8  movl;
+	u32 val;
+	u16 int80;
+	u8  pad;
+} __attribute__((packed)) rt_retcode = {
+	0xb8,		/* movl $..., %eax */
+	__NR_rt_sigreturn,
+	0x80cd,		/* int $0x80 */
+	0
+};
 
 /*
  * Determine which stack to use..
@@ -557,6 +389,265 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 
 	return 0;
 }
+#else /* !CONFIG_X86_32 */
+/*
+ * Determine which stack to use..
+ */
+static void __user *
+get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size)
+{
+	/* Default to using normal stack - redzone*/
+	sp -= 128;
+
+	/* This is the X/Open sanctioned signal stack switching.  */
+	if (ka->sa.sa_flags & SA_ONSTACK) {
+		if (sas_ss_flags(sp) == 0)
+			sp = current->sas_ss_sp + current->sas_ss_size;
+	}
+
+	return (void __user *)round_down(sp - size, 64);
+}
+
+static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+			    sigset_t *set, struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	void __user *fp = NULL;
+	int err = 0;
+	struct task_struct *me = current;
+
+	if (used_math()) {
+		fp = get_stack(ka, regs->sp, sig_xstate_size);
+		frame = (void __user *)round_down(
+			(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
+
+		if (save_i387_xstate(fp) < 0)
+			return -EFAULT;
+	} else
+		frame = get_stack(ka, regs->sp, sizeof(struct rt_sigframe)) - 8;
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		return -EFAULT;
+
+	if (ka->sa.sa_flags & SA_SIGINFO) {
+		if (copy_siginfo_to_user(&frame->info, info))
+			return -EFAULT;
+	}
+
+	/* Create the ucontext.  */
+	if (cpu_has_xsave)
+		err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+	else
+		err |= __put_user(0, &frame->uc.uc_flags);
+	err |= __put_user(0, &frame->uc.uc_link);
+	err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+	err |= __put_user(sas_ss_flags(regs->sp),
+			  &frame->uc.uc_stack.ss_flags);
+	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
+	/* Set up to return from userspace.  If provided, use a stub
+	   already in userspace.  */
+	/* x86-64 should always use SA_RESTORER. */
+	if (ka->sa.sa_flags & SA_RESTORER) {
+		err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
+	} else {
+		/* could use a vstub here */
+		return -EFAULT;
+	}
+
+	if (err)
+		return -EFAULT;
+
+	/* Set up registers for signal handler */
+	regs->di = sig;
+	/* In case the signal handler was declared without prototypes */
+	regs->ax = 0;
+
+	/* This also works for non SA_SIGINFO handlers because they expect the
+	   next argument after the signal number on the stack. */
+	regs->si = (unsigned long)&frame->info;
+	regs->dx = (unsigned long)&frame->uc;
+	regs->ip = (unsigned long) ka->sa.sa_handler;
+
+	regs->sp = (unsigned long)frame;
+
+	/* Set up the CS register to run signal handlers in 64-bit mode,
+	   even if the handler happens to be interrupting 32-bit code. */
+	regs->cs = __USER_CS;
+
+	return 0;
+}
+#endif /* CONFIG_X86_32 */
+
+/*
+ * Atomically swap in the new signal mask, and wait for a signal.
+ */
+asmlinkage int
+sys_sigsuspend(int history0, int history1, old_sigset_t mask)
+{
+	mask &= _BLOCKABLE;
+	spin_lock_irq(&current->sighand->siglock);
+	current->saved_sigmask = current->blocked;
+	siginitset(&current->blocked, mask);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	current->state = TASK_INTERRUPTIBLE;
+	schedule();
+	set_restore_sigmask();
+
+	return -ERESTARTNOHAND;
+}
+
+asmlinkage int
+sys_sigaction(int sig, const struct old_sigaction __user *act,
+	      struct old_sigaction __user *oact)
+{
+	struct k_sigaction new_ka, old_ka;
+	int ret;
+
+	if (act) {
+		old_sigset_t mask;
+
+		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
+			return -EFAULT;
+
+		__get_user(new_ka.sa.sa_flags, &act->sa_flags);
+		__get_user(mask, &act->sa_mask);
+		siginitset(&new_ka.sa.sa_mask, mask);
+	}
+
+	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+	if (!ret && oact) {
+		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
+			return -EFAULT;
+
+		__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+		__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+	}
+
+	return ret;
+}
+
+#ifdef CONFIG_X86_32
+asmlinkage int sys_sigaltstack(unsigned long bx)
+{
+	/*
+	 * This is needed to make gcc realize it doesn't own the
+	 * "struct pt_regs"
+	 */
+	struct pt_regs *regs = (struct pt_regs *)&bx;
+	const stack_t __user *uss = (const stack_t __user *)bx;
+	stack_t __user *uoss = (stack_t __user *)regs->cx;
+
+	return do_sigaltstack(uss, uoss, regs->sp);
+}
+#else /* !CONFIG_X86_32 */
+asmlinkage long
+sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+		struct pt_regs *regs)
+{
+	return do_sigaltstack(uss, uoss, regs->sp);
+}
+#endif /* CONFIG_X86_32 */
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
+{
+	struct sigframe __user *frame;
+	struct pt_regs *regs;
+	unsigned long ax;
+	sigset_t set;
+
+	regs = (struct pt_regs *) &__unused;
+	frame = (struct sigframe __user *)(regs->sp - 8);
+
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1
+		&& __copy_from_user(&set.sig[1], &frame->extramask,
+				    sizeof(frame->extramask))))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(regs, &frame->sc, &ax))
+		goto badframe;
+	return ax;
+
+badframe:
+	if (show_unhandled_signals && printk_ratelimit()) {
+		printk("%s%s[%d] bad frame in sigreturn frame:"
+			"%p ip:%lx sp:%lx oeax:%lx",
+		    task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
+		    current->comm, task_pid_nr(current), frame, regs->ip,
+		    regs->sp, regs->orig_ax);
+		print_vma_addr(" in ", regs->ip);
+		printk(KERN_CONT "\n");
+	}
+
+	force_sig(SIGSEGV, current);
+
+	return 0;
+}
+
+static long do_rt_sigreturn(struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	unsigned long ax;
+	sigset_t set;
+
+	frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+		goto badframe;
+
+	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
+		goto badframe;
+
+	return ax;
+
+badframe:
+	signal_fault(regs, frame, "rt_sigreturn");
+	return 0;
+}
+
+#ifdef CONFIG_X86_32
+asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+{
+	struct pt_regs *regs = (struct pt_regs *)&__unused;
+
+	return do_rt_sigreturn(regs);
+}
+#else /* !CONFIG_X86_32 */
+asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
+{
+	return do_rt_sigreturn(regs);
+}
+#endif /* CONFIG_X86_32 */
 
 /*
  * OK, we're invoking a handler:
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 771c8fcc8b0d..2da7e6e60807 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -174,80 +174,212 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 	return err;
 }
 
+/*
+ * Set up a signal frame.
+ */
 #ifdef CONFIG_X86_32
-asmlinkage int sys_sigaltstack(unsigned long bx)
+static const struct {
+	u16 poplmovl;
+	u32 val;
+	u16 int80;
+} __attribute__((packed)) retcode = {
+	0xb858,		/* popl %eax; movl $..., %eax */
+	__NR_sigreturn,
+	0x80cd,		/* int $0x80 */
+};
+
+static const struct {
+	u8  movl;
+	u32 val;
+	u16 int80;
+	u8  pad;
+} __attribute__((packed)) rt_retcode = {
+	0xb8,		/* movl $..., %eax */
+	__NR_rt_sigreturn,
+	0x80cd,		/* int $0x80 */
+	0
+};
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *
+get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
+	     void **fpstate)
 {
+	unsigned long sp;
+
+	/* Default to using normal stack */
+	sp = regs->sp;
+
 	/*
-	 * This is needed to make gcc realize it doesn't own the
-	 * "struct pt_regs"
+	 * If we are on the alternate signal stack and would overflow it, don't.
+	 * Return an always-bogus address instead so we will die with SIGSEGV.
 	 */
-	struct pt_regs *regs = (struct pt_regs *)&bx;
-	const stack_t __user *uss = (const stack_t __user *)bx;
-	stack_t __user *uoss = (stack_t __user *)regs->cx;
+	if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size)))
+		return (void __user *) -1L;
 
-	return do_sigaltstack(uss, uoss, regs->sp);
-}
-#else /* !CONFIG_X86_32 */
-asmlinkage long
-sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
-		struct pt_regs *regs)
-{
-	return do_sigaltstack(uss, uoss, regs->sp);
+	/* This is the X/Open sanctioned signal stack switching.  */
+	if (ka->sa.sa_flags & SA_ONSTACK) {
+		if (sas_ss_flags(sp) == 0)
+			sp = current->sas_ss_sp + current->sas_ss_size;
+	} else {
+		/* This is the legacy signal stack switching. */
+		if ((regs->ss & 0xffff) != __USER_DS &&
+			!(ka->sa.sa_flags & SA_RESTORER) &&
+				ka->sa.sa_restorer)
+			sp = (unsigned long) ka->sa.sa_restorer;
+	}
+
+	if (used_math()) {
+		sp = sp - sig_xstate_size;
+		*fpstate = (struct _fpstate *) sp;
+		if (save_i387_xstate(*fpstate) < 0)
+			return (void __user *)-1L;
+	}
+
+	sp -= frame_size;
+	/*
+	 * Align the stack pointer according to the i386 ABI,
+	 * i.e. so that on function entry ((sp + 4) & 15) == 0.
+	 */
+	sp = ((sp + 4) & -16ul) - 4;
+
+	return (void __user *) sp;
 }
-#endif /* CONFIG_X86_32 */
 
-/*
- * Do a signal return; undo the signal stack.
- */
-static long do_rt_sigreturn(struct pt_regs *regs)
+static int
+__setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
+	      struct pt_regs *regs)
 {
-	struct rt_sigframe __user *frame;
-	unsigned long ax;
-	sigset_t set;
+	struct sigframe __user *frame;
+	void __user *restorer;
+	int err = 0;
+	void __user *fpstate = NULL;
 
-	frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
-		goto badframe;
+	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		return -EFAULT;
 
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
-		goto badframe;
+	if (__put_user(sig, &frame->sig))
+		return -EFAULT;
 
-	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
-		goto badframe;
+	if (setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]))
+		return -EFAULT;
 
-	return ax;
+	if (_NSIG_WORDS > 1) {
+		if (__copy_to_user(&frame->extramask, &set->sig[1],
+				   sizeof(frame->extramask)))
+			return -EFAULT;
+	}
+
+	if (current->mm->context.vdso)
+		restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn);
+	else
+		restorer = &frame->retcode;
+	if (ka->sa.sa_flags & SA_RESTORER)
+		restorer = ka->sa.sa_restorer;
+
+	/* Set up to return from userspace.  */
+	err |= __put_user(restorer, &frame->pretcode);
+
+	/*
+	 * This is popl %eax ; movl $__NR_sigreturn, %eax ; int $0x80
+	 *
+	 * WE DO NOT USE IT ANY MORE! It's only left here for historical
+	 * reasons and because gdb uses it as a signature to notice
+	 * signal handler stack frames.
+	 */
+	err |= __put_user(*((u64 *)&retcode), (u64 *)frame->retcode);
+
+	if (err)
+		return -EFAULT;
+
+	/* Set up registers for signal handler */
+	regs->sp = (unsigned long)frame;
+	regs->ip = (unsigned long)ka->sa.sa_handler;
+	regs->ax = (unsigned long)sig;
+	regs->dx = 0;
+	regs->cx = 0;
+
+	regs->ds = __USER_DS;
+	regs->es = __USER_DS;
+	regs->ss = __USER_DS;
+	regs->cs = __USER_CS;
 
-badframe:
-	signal_fault(regs, frame, "rt_sigreturn");
 	return 0;
 }
 
-#ifdef CONFIG_X86_32
-asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+			    sigset_t *set, struct pt_regs *regs)
 {
-	struct pt_regs *regs = (struct pt_regs *)&__unused;
+	struct rt_sigframe __user *frame;
+	void __user *restorer;
+	int err = 0;
+	void __user *fpstate = NULL;
 
-	return do_rt_sigreturn(regs);
-}
-#else /* !CONFIG_X86_32 */
-asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
-{
-	return do_rt_sigreturn(regs);
-}
-#endif /* CONFIG_X86_32 */
+	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
 
-/*
- * Set up a signal frame.
- */
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		return -EFAULT;
+
+	err |= __put_user(sig, &frame->sig);
+	err |= __put_user(&frame->info, &frame->pinfo);
+	err |= __put_user(&frame->uc, &frame->puc);
+	err |= copy_siginfo_to_user(&frame->info, info);
+	if (err)
+		return -EFAULT;
+
+	/* Create the ucontext.  */
+	if (cpu_has_xsave)
+		err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+	else
+		err |= __put_user(0, &frame->uc.uc_flags);
+	err |= __put_user(0, &frame->uc.uc_link);
+	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+	err |= __put_user(sas_ss_flags(regs->sp),
+			  &frame->uc.uc_stack.ss_flags);
+	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
+				regs, set->sig[0]);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+	if (err)
+		return -EFAULT;
+
+	/* Set up to return from userspace.  */
+	restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
+	if (ka->sa.sa_flags & SA_RESTORER)
+		restorer = ka->sa.sa_restorer;
+	err |= __put_user(restorer, &frame->pretcode);
+
+	/*
+	 * This is movl $__NR_rt_sigreturn, %ax ; int $0x80
+	 *
+	 * WE DO NOT USE IT ANY MORE! It's only left here for historical
+	 * reasons and because gdb uses it as a signature to notice
+	 * signal handler stack frames.
+	 */
+	err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
 
+	if (err)
+		return -EFAULT;
+
+	/* Set up registers for signal handler */
+	regs->sp = (unsigned long)frame;
+	regs->ip = (unsigned long)ka->sa.sa_handler;
+	regs->ax = (unsigned long)sig;
+	regs->dx = (unsigned long)&frame->info;
+	regs->cx = (unsigned long)&frame->uc;
+
+	regs->ds = __USER_DS;
+	regs->es = __USER_DS;
+	regs->ss = __USER_DS;
+	regs->cs = __USER_CS;
+
+	return 0;
+}
+#else /* !CONFIG_X86_32 */
 /*
  * Determine which stack to use..
  */
@@ -337,6 +469,77 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 
 	return 0;
 }
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_X86_32
+asmlinkage int sys_sigaltstack(unsigned long bx)
+{
+	/*
+	 * This is needed to make gcc realize it doesn't own the
+	 * "struct pt_regs"
+	 */
+	struct pt_regs *regs = (struct pt_regs *)&bx;
+	const stack_t __user *uss = (const stack_t __user *)bx;
+	stack_t __user *uoss = (stack_t __user *)regs->cx;
+
+	return do_sigaltstack(uss, uoss, regs->sp);
+}
+#else /* !CONFIG_X86_32 */
+asmlinkage long
+sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+		struct pt_regs *regs)
+{
+	return do_sigaltstack(uss, uoss, regs->sp);
+}
+#endif /* CONFIG_X86_32 */
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+static long do_rt_sigreturn(struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	unsigned long ax;
+	sigset_t set;
+
+	frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+		goto badframe;
+
+	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
+		goto badframe;
+
+	return ax;
+
+badframe:
+	signal_fault(regs, frame, "rt_sigreturn");
+	return 0;
+}
+
+#ifdef CONFIG_X86_32
+asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+{
+	struct pt_regs *regs = (struct pt_regs *)&__unused;
+
+	return do_rt_sigreturn(regs);
+}
+#else /* !CONFIG_X86_32 */
+asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
+{
+	return do_rt_sigreturn(regs);
+}
+#endif /* CONFIG_X86_32 */
 
 /*
  * OK, we're invoking a handler
-- 
cgit v1.2.3-70-g09d2


From e5fa2d063cf2ca38eae5fb3469315db669d5c041 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Mon, 24 Nov 2008 18:24:11 -0800
Subject: x86: signal: unify signal_{32|64}.c, prepare

Impact: cleanup

Add #ifdef directive for 32-bit only code.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c |   6 +++
 arch/x86/kernel/signal_64.c | 116 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 121 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index e9f71298e746..b1f4d34e0a38 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -1,8 +1,10 @@
 /*
  *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  *
  *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
  *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
+ *  2000-2002   x86-64 support by Andi Kleen
  */
 
 #include <linux/sched.h>
@@ -481,6 +483,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 }
 #endif /* CONFIG_X86_32 */
 
+#ifdef CONFIG_X86_32
 /*
  * Atomically swap in the new signal mask, and wait for a signal.
  */
@@ -535,6 +538,7 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
 
 	return ret;
 }
+#endif /* CONFIG_X86_32 */
 
 #ifdef CONFIG_X86_32
 asmlinkage int sys_sigaltstack(unsigned long bx)
@@ -561,6 +565,7 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 /*
  * Do a signal return; undo the signal stack.
  */
+#ifdef CONFIG_X86_32
 asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
 {
 	struct sigframe __user *frame;
@@ -603,6 +608,7 @@ badframe:
 
 	return 0;
 }
+#endif /* CONFIG_X86_32 */
 
 static long do_rt_sigreturn(struct pt_regs *regs)
 {
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 2da7e6e60807..b1f4d34e0a38 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -54,12 +54,24 @@
 	err |= __get_user(regs->x, &sc->x);	\
 }
 
+#define COPY_SEG(seg)		{			\
+		unsigned short tmp;			\
+		err |= __get_user(tmp, &sc->seg);	\
+		regs->seg = tmp;			\
+}
+
 #define COPY_SEG_CPL3(seg)	{			\
 		unsigned short tmp;			\
 		err |= __get_user(tmp, &sc->seg);	\
 		regs->seg = tmp | 3;			\
 }
 
+#define GET_SEG(seg)		{			\
+		unsigned short tmp;			\
+		err |= __get_user(tmp, &sc->seg);	\
+		loadsegment(seg, tmp);			\
+}
+
 static int
 restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 		   unsigned long *pax)
@@ -471,6 +483,63 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 }
 #endif /* CONFIG_X86_32 */
 
+#ifdef CONFIG_X86_32
+/*
+ * Atomically swap in the new signal mask, and wait for a signal.
+ */
+asmlinkage int
+sys_sigsuspend(int history0, int history1, old_sigset_t mask)
+{
+	mask &= _BLOCKABLE;
+	spin_lock_irq(&current->sighand->siglock);
+	current->saved_sigmask = current->blocked;
+	siginitset(&current->blocked, mask);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	current->state = TASK_INTERRUPTIBLE;
+	schedule();
+	set_restore_sigmask();
+
+	return -ERESTARTNOHAND;
+}
+
+asmlinkage int
+sys_sigaction(int sig, const struct old_sigaction __user *act,
+	      struct old_sigaction __user *oact)
+{
+	struct k_sigaction new_ka, old_ka;
+	int ret;
+
+	if (act) {
+		old_sigset_t mask;
+
+		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
+			return -EFAULT;
+
+		__get_user(new_ka.sa.sa_flags, &act->sa_flags);
+		__get_user(mask, &act->sa_mask);
+		siginitset(&new_ka.sa.sa_mask, mask);
+	}
+
+	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+	if (!ret && oact) {
+		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
+			return -EFAULT;
+
+		__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+		__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+	}
+
+	return ret;
+}
+#endif /* CONFIG_X86_32 */
+
 #ifdef CONFIG_X86_32
 asmlinkage int sys_sigaltstack(unsigned long bx)
 {
@@ -496,6 +565,51 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 /*
  * Do a signal return; undo the signal stack.
  */
+#ifdef CONFIG_X86_32
+asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
+{
+	struct sigframe __user *frame;
+	struct pt_regs *regs;
+	unsigned long ax;
+	sigset_t set;
+
+	regs = (struct pt_regs *) &__unused;
+	frame = (struct sigframe __user *)(regs->sp - 8);
+
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1
+		&& __copy_from_user(&set.sig[1], &frame->extramask,
+				    sizeof(frame->extramask))))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(regs, &frame->sc, &ax))
+		goto badframe;
+	return ax;
+
+badframe:
+	if (show_unhandled_signals && printk_ratelimit()) {
+		printk("%s%s[%d] bad frame in sigreturn frame:"
+			"%p ip:%lx sp:%lx oeax:%lx",
+		    task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
+		    current->comm, task_pid_nr(current), frame, regs->ip,
+		    regs->sp, regs->orig_ax);
+		print_vma_addr(" in ", regs->ip);
+		printk(KERN_CONT "\n");
+	}
+
+	force_sig(SIGSEGV, current);
+
+	return 0;
+}
+#endif /* CONFIG_X86_32 */
+
 static long do_rt_sigreturn(struct pt_regs *regs)
 {
 	struct rt_sigframe __user *frame;
@@ -542,7 +656,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
 #endif /* CONFIG_X86_32 */
 
 /*
- * OK, we're invoking a handler
+ * OK, we're invoking a handler:
  */
 static int signr_convert(int sig)
 {
-- 
cgit v1.2.3-70-g09d2


From 5ceb40da9bacc8b056805d72efb1a52502d56b6b Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Mon, 24 Nov 2008 18:24:11 -0800
Subject: x86: signal: unify signal_{32|64}.c

Impact: cleanup

Unify signal_{32|64}.c! Mechanic unification - the two
files are the same.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile    |   2 +-
 arch/x86/kernel/signal.c    | 915 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/signal_32.c | 915 --------------------------------------------
 arch/x86/kernel/signal_64.c | 915 --------------------------------------------
 4 files changed, 916 insertions(+), 1831 deletions(-)
 create mode 100644 arch/x86/kernel/signal.c
 delete mode 100644 arch/x86/kernel/signal_32.c
 delete mode 100644 arch/x86/kernel/signal_64.c

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d7e5a58ee22f..ef28c210ebf8 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -22,7 +22,7 @@ CFLAGS_vsyscall_64.o	:= $(PROFILING) -g0 $(nostackp)
 CFLAGS_hpet.o		:= $(nostackp)
 CFLAGS_tsc.o		:= $(nostackp)
 
-obj-y			:= process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
+obj-y			:= process_$(BITS).o signal.o entry_$(BITS).o
 obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y			+= time_$(BITS).o ioport.o ldt.o
 obj-y			+= setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
new file mode 100644
index 000000000000..b1f4d34e0a38
--- /dev/null
+++ b/arch/x86/kernel/signal.c
@@ -0,0 +1,915 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
+ *
+ *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
+ *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
+ *  2000-2002   x86-64 support by Andi Kleen
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/tracehook.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/personality.h>
+#include <linux/uaccess.h>
+
+#include <asm/processor.h>
+#include <asm/ucontext.h>
+#include <asm/i387.h>
+#include <asm/vdso.h>
+
+#ifdef CONFIG_X86_64
+#include <asm/proto.h>
+#include <asm/ia32_unistd.h>
+#include <asm/mce.h>
+#endif /* CONFIG_X86_64 */
+
+#include <asm/syscall.h>
+#include <asm/syscalls.h>
+
+#include "sigframe.h"
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+#define __FIX_EFLAGS	(X86_EFLAGS_AC | X86_EFLAGS_OF | \
+			 X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
+			 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
+			 X86_EFLAGS_CF)
+
+#ifdef CONFIG_X86_32
+# define FIX_EFLAGS	(__FIX_EFLAGS | X86_EFLAGS_RF)
+#else
+# define FIX_EFLAGS	__FIX_EFLAGS
+#endif
+
+#define COPY(x)			{		\
+	err |= __get_user(regs->x, &sc->x);	\
+}
+
+#define COPY_SEG(seg)		{			\
+		unsigned short tmp;			\
+		err |= __get_user(tmp, &sc->seg);	\
+		regs->seg = tmp;			\
+}
+
+#define COPY_SEG_CPL3(seg)	{			\
+		unsigned short tmp;			\
+		err |= __get_user(tmp, &sc->seg);	\
+		regs->seg = tmp | 3;			\
+}
+
+#define GET_SEG(seg)		{			\
+		unsigned short tmp;			\
+		err |= __get_user(tmp, &sc->seg);	\
+		loadsegment(seg, tmp);			\
+}
+
+static int
+restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
+		   unsigned long *pax)
+{
+	void __user *buf;
+	unsigned int tmpflags;
+	unsigned int err = 0;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+#ifdef CONFIG_X86_32
+	GET_SEG(gs);
+	COPY_SEG(fs);
+	COPY_SEG(es);
+	COPY_SEG(ds);
+#endif /* CONFIG_X86_32 */
+
+	COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
+	COPY(dx); COPY(cx); COPY(ip);
+
+#ifdef CONFIG_X86_64
+	COPY(r8);
+	COPY(r9);
+	COPY(r10);
+	COPY(r11);
+	COPY(r12);
+	COPY(r13);
+	COPY(r14);
+	COPY(r15);
+#endif /* CONFIG_X86_64 */
+
+#ifdef CONFIG_X86_32
+	COPY_SEG_CPL3(cs);
+	COPY_SEG_CPL3(ss);
+#else /* !CONFIG_X86_32 */
+	/* Kernel saves and restores only the CS segment register on signals,
+	 * which is the bare minimum needed to allow mixed 32/64-bit code.
+	 * App's signal handler can save/restore other segments if needed. */
+	COPY_SEG_CPL3(cs);
+#endif /* CONFIG_X86_32 */
+
+	err |= __get_user(tmpflags, &sc->flags);
+	regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
+	regs->orig_ax = -1;		/* disable syscall checks */
+
+	err |= __get_user(buf, &sc->fpstate);
+	err |= restore_i387_xstate(buf);
+
+	err |= __get_user(*pax, &sc->ax);
+	return err;
+}
+
+static int
+setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
+		 struct pt_regs *regs, unsigned long mask)
+{
+	int err = 0;
+
+#ifdef CONFIG_X86_32
+	{
+		unsigned int tmp;
+
+		savesegment(gs, tmp);
+		err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
+	}
+	err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
+	err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
+	err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
+#endif /* CONFIG_X86_32 */
+
+	err |= __put_user(regs->di, &sc->di);
+	err |= __put_user(regs->si, &sc->si);
+	err |= __put_user(regs->bp, &sc->bp);
+	err |= __put_user(regs->sp, &sc->sp);
+	err |= __put_user(regs->bx, &sc->bx);
+	err |= __put_user(regs->dx, &sc->dx);
+	err |= __put_user(regs->cx, &sc->cx);
+	err |= __put_user(regs->ax, &sc->ax);
+#ifdef CONFIG_X86_64
+	err |= __put_user(regs->r8, &sc->r8);
+	err |= __put_user(regs->r9, &sc->r9);
+	err |= __put_user(regs->r10, &sc->r10);
+	err |= __put_user(regs->r11, &sc->r11);
+	err |= __put_user(regs->r12, &sc->r12);
+	err |= __put_user(regs->r13, &sc->r13);
+	err |= __put_user(regs->r14, &sc->r14);
+	err |= __put_user(regs->r15, &sc->r15);
+#endif /* CONFIG_X86_64 */
+
+	err |= __put_user(current->thread.trap_no, &sc->trapno);
+	err |= __put_user(current->thread.error_code, &sc->err);
+	err |= __put_user(regs->ip, &sc->ip);
+#ifdef CONFIG_X86_32
+	err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
+	err |= __put_user(regs->flags, &sc->flags);
+	err |= __put_user(regs->sp, &sc->sp_at_signal);
+	err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
+#else /* !CONFIG_X86_32 */
+	err |= __put_user(regs->flags, &sc->flags);
+	err |= __put_user(regs->cs, &sc->cs);
+	err |= __put_user(0, &sc->gs);
+	err |= __put_user(0, &sc->fs);
+#endif /* CONFIG_X86_32 */
+
+	err |= __put_user(fpstate, &sc->fpstate);
+
+	/* non-iBCS2 extensions.. */
+	err |= __put_user(mask, &sc->oldmask);
+	err |= __put_user(current->thread.cr2, &sc->cr2);
+
+	return err;
+}
+
+/*
+ * Set up a signal frame.
+ */
+#ifdef CONFIG_X86_32
+static const struct {
+	u16 poplmovl;
+	u32 val;
+	u16 int80;
+} __attribute__((packed)) retcode = {
+	0xb858,		/* popl %eax; movl $..., %eax */
+	__NR_sigreturn,
+	0x80cd,		/* int $0x80 */
+};
+
+static const struct {
+	u8  movl;
+	u32 val;
+	u16 int80;
+	u8  pad;
+} __attribute__((packed)) rt_retcode = {
+	0xb8,		/* movl $..., %eax */
+	__NR_rt_sigreturn,
+	0x80cd,		/* int $0x80 */
+	0
+};
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *
+get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
+	     void **fpstate)
+{
+	unsigned long sp;
+
+	/* Default to using normal stack */
+	sp = regs->sp;
+
+	/*
+	 * If we are on the alternate signal stack and would overflow it, don't.
+	 * Return an always-bogus address instead so we will die with SIGSEGV.
+	 */
+	if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size)))
+		return (void __user *) -1L;
+
+	/* This is the X/Open sanctioned signal stack switching.  */
+	if (ka->sa.sa_flags & SA_ONSTACK) {
+		if (sas_ss_flags(sp) == 0)
+			sp = current->sas_ss_sp + current->sas_ss_size;
+	} else {
+		/* This is the legacy signal stack switching. */
+		if ((regs->ss & 0xffff) != __USER_DS &&
+			!(ka->sa.sa_flags & SA_RESTORER) &&
+				ka->sa.sa_restorer)
+			sp = (unsigned long) ka->sa.sa_restorer;
+	}
+
+	if (used_math()) {
+		sp = sp - sig_xstate_size;
+		*fpstate = (struct _fpstate *) sp;
+		if (save_i387_xstate(*fpstate) < 0)
+			return (void __user *)-1L;
+	}
+
+	sp -= frame_size;
+	/*
+	 * Align the stack pointer according to the i386 ABI,
+	 * i.e. so that on function entry ((sp + 4) & 15) == 0.
+	 */
+	sp = ((sp + 4) & -16ul) - 4;
+
+	return (void __user *) sp;
+}
+
+static int
+__setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
+	      struct pt_regs *regs)
+{
+	struct sigframe __user *frame;
+	void __user *restorer;
+	int err = 0;
+	void __user *fpstate = NULL;
+
+	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		return -EFAULT;
+
+	if (__put_user(sig, &frame->sig))
+		return -EFAULT;
+
+	if (setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]))
+		return -EFAULT;
+
+	if (_NSIG_WORDS > 1) {
+		if (__copy_to_user(&frame->extramask, &set->sig[1],
+				   sizeof(frame->extramask)))
+			return -EFAULT;
+	}
+
+	if (current->mm->context.vdso)
+		restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn);
+	else
+		restorer = &frame->retcode;
+	if (ka->sa.sa_flags & SA_RESTORER)
+		restorer = ka->sa.sa_restorer;
+
+	/* Set up to return from userspace.  */
+	err |= __put_user(restorer, &frame->pretcode);
+
+	/*
+	 * This is popl %eax ; movl $__NR_sigreturn, %eax ; int $0x80
+	 *
+	 * WE DO NOT USE IT ANY MORE! It's only left here for historical
+	 * reasons and because gdb uses it as a signature to notice
+	 * signal handler stack frames.
+	 */
+	err |= __put_user(*((u64 *)&retcode), (u64 *)frame->retcode);
+
+	if (err)
+		return -EFAULT;
+
+	/* Set up registers for signal handler */
+	regs->sp = (unsigned long)frame;
+	regs->ip = (unsigned long)ka->sa.sa_handler;
+	regs->ax = (unsigned long)sig;
+	regs->dx = 0;
+	regs->cx = 0;
+
+	regs->ds = __USER_DS;
+	regs->es = __USER_DS;
+	regs->ss = __USER_DS;
+	regs->cs = __USER_CS;
+
+	return 0;
+}
+
+static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+			    sigset_t *set, struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	void __user *restorer;
+	int err = 0;
+	void __user *fpstate = NULL;
+
+	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		return -EFAULT;
+
+	err |= __put_user(sig, &frame->sig);
+	err |= __put_user(&frame->info, &frame->pinfo);
+	err |= __put_user(&frame->uc, &frame->puc);
+	err |= copy_siginfo_to_user(&frame->info, info);
+	if (err)
+		return -EFAULT;
+
+	/* Create the ucontext.  */
+	if (cpu_has_xsave)
+		err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+	else
+		err |= __put_user(0, &frame->uc.uc_flags);
+	err |= __put_user(0, &frame->uc.uc_link);
+	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+	err |= __put_user(sas_ss_flags(regs->sp),
+			  &frame->uc.uc_stack.ss_flags);
+	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
+				regs, set->sig[0]);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+	if (err)
+		return -EFAULT;
+
+	/* Set up to return from userspace.  */
+	restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
+	if (ka->sa.sa_flags & SA_RESTORER)
+		restorer = ka->sa.sa_restorer;
+	err |= __put_user(restorer, &frame->pretcode);
+
+	/*
+	 * This is movl $__NR_rt_sigreturn, %ax ; int $0x80
+	 *
+	 * WE DO NOT USE IT ANY MORE! It's only left here for historical
+	 * reasons and because gdb uses it as a signature to notice
+	 * signal handler stack frames.
+	 */
+	err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
+
+	if (err)
+		return -EFAULT;
+
+	/* Set up registers for signal handler */
+	regs->sp = (unsigned long)frame;
+	regs->ip = (unsigned long)ka->sa.sa_handler;
+	regs->ax = (unsigned long)sig;
+	regs->dx = (unsigned long)&frame->info;
+	regs->cx = (unsigned long)&frame->uc;
+
+	regs->ds = __USER_DS;
+	regs->es = __USER_DS;
+	regs->ss = __USER_DS;
+	regs->cs = __USER_CS;
+
+	return 0;
+}
+#else /* !CONFIG_X86_32 */
+/*
+ * Determine which stack to use..
+ */
+static void __user *
+get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size)
+{
+	/* Default to using normal stack - redzone*/
+	sp -= 128;
+
+	/* This is the X/Open sanctioned signal stack switching.  */
+	if (ka->sa.sa_flags & SA_ONSTACK) {
+		if (sas_ss_flags(sp) == 0)
+			sp = current->sas_ss_sp + current->sas_ss_size;
+	}
+
+	return (void __user *)round_down(sp - size, 64);
+}
+
+static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+			    sigset_t *set, struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	void __user *fp = NULL;
+	int err = 0;
+	struct task_struct *me = current;
+
+	if (used_math()) {
+		fp = get_stack(ka, regs->sp, sig_xstate_size);
+		frame = (void __user *)round_down(
+			(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
+
+		if (save_i387_xstate(fp) < 0)
+			return -EFAULT;
+	} else
+		frame = get_stack(ka, regs->sp, sizeof(struct rt_sigframe)) - 8;
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		return -EFAULT;
+
+	if (ka->sa.sa_flags & SA_SIGINFO) {
+		if (copy_siginfo_to_user(&frame->info, info))
+			return -EFAULT;
+	}
+
+	/* Create the ucontext.  */
+	if (cpu_has_xsave)
+		err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+	else
+		err |= __put_user(0, &frame->uc.uc_flags);
+	err |= __put_user(0, &frame->uc.uc_link);
+	err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+	err |= __put_user(sas_ss_flags(regs->sp),
+			  &frame->uc.uc_stack.ss_flags);
+	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
+	/* Set up to return from userspace.  If provided, use a stub
+	   already in userspace.  */
+	/* x86-64 should always use SA_RESTORER. */
+	if (ka->sa.sa_flags & SA_RESTORER) {
+		err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
+	} else {
+		/* could use a vstub here */
+		return -EFAULT;
+	}
+
+	if (err)
+		return -EFAULT;
+
+	/* Set up registers for signal handler */
+	regs->di = sig;
+	/* In case the signal handler was declared without prototypes */
+	regs->ax = 0;
+
+	/* This also works for non SA_SIGINFO handlers because they expect the
+	   next argument after the signal number on the stack. */
+	regs->si = (unsigned long)&frame->info;
+	regs->dx = (unsigned long)&frame->uc;
+	regs->ip = (unsigned long) ka->sa.sa_handler;
+
+	regs->sp = (unsigned long)frame;
+
+	/* Set up the CS register to run signal handlers in 64-bit mode,
+	   even if the handler happens to be interrupting 32-bit code. */
+	regs->cs = __USER_CS;
+
+	return 0;
+}
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_X86_32
+/*
+ * Atomically swap in the new signal mask, and wait for a signal.
+ */
+asmlinkage int
+sys_sigsuspend(int history0, int history1, old_sigset_t mask)
+{
+	mask &= _BLOCKABLE;
+	spin_lock_irq(&current->sighand->siglock);
+	current->saved_sigmask = current->blocked;
+	siginitset(&current->blocked, mask);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	current->state = TASK_INTERRUPTIBLE;
+	schedule();
+	set_restore_sigmask();
+
+	return -ERESTARTNOHAND;
+}
+
+asmlinkage int
+sys_sigaction(int sig, const struct old_sigaction __user *act,
+	      struct old_sigaction __user *oact)
+{
+	struct k_sigaction new_ka, old_ka;
+	int ret;
+
+	if (act) {
+		old_sigset_t mask;
+
+		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
+			return -EFAULT;
+
+		__get_user(new_ka.sa.sa_flags, &act->sa_flags);
+		__get_user(mask, &act->sa_mask);
+		siginitset(&new_ka.sa.sa_mask, mask);
+	}
+
+	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+	if (!ret && oact) {
+		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
+			return -EFAULT;
+
+		__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+		__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+	}
+
+	return ret;
+}
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_X86_32
+asmlinkage int sys_sigaltstack(unsigned long bx)
+{
+	/*
+	 * This is needed to make gcc realize it doesn't own the
+	 * "struct pt_regs"
+	 */
+	struct pt_regs *regs = (struct pt_regs *)&bx;
+	const stack_t __user *uss = (const stack_t __user *)bx;
+	stack_t __user *uoss = (stack_t __user *)regs->cx;
+
+	return do_sigaltstack(uss, uoss, regs->sp);
+}
+#else /* !CONFIG_X86_32 */
+asmlinkage long
+sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+		struct pt_regs *regs)
+{
+	return do_sigaltstack(uss, uoss, regs->sp);
+}
+#endif /* CONFIG_X86_32 */
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+#ifdef CONFIG_X86_32
+asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
+{
+	struct sigframe __user *frame;
+	struct pt_regs *regs;
+	unsigned long ax;
+	sigset_t set;
+
+	regs = (struct pt_regs *) &__unused;
+	frame = (struct sigframe __user *)(regs->sp - 8);
+
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1
+		&& __copy_from_user(&set.sig[1], &frame->extramask,
+				    sizeof(frame->extramask))))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(regs, &frame->sc, &ax))
+		goto badframe;
+	return ax;
+
+badframe:
+	if (show_unhandled_signals && printk_ratelimit()) {
+		printk("%s%s[%d] bad frame in sigreturn frame:"
+			"%p ip:%lx sp:%lx oeax:%lx",
+		    task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
+		    current->comm, task_pid_nr(current), frame, regs->ip,
+		    regs->sp, regs->orig_ax);
+		print_vma_addr(" in ", regs->ip);
+		printk(KERN_CONT "\n");
+	}
+
+	force_sig(SIGSEGV, current);
+
+	return 0;
+}
+#endif /* CONFIG_X86_32 */
+
+static long do_rt_sigreturn(struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	unsigned long ax;
+	sigset_t set;
+
+	frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+		goto badframe;
+
+	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
+		goto badframe;
+
+	return ax;
+
+badframe:
+	signal_fault(regs, frame, "rt_sigreturn");
+	return 0;
+}
+
+#ifdef CONFIG_X86_32
+asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+{
+	struct pt_regs *regs = (struct pt_regs *)&__unused;
+
+	return do_rt_sigreturn(regs);
+}
+#else /* !CONFIG_X86_32 */
+asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
+{
+	return do_rt_sigreturn(regs);
+}
+#endif /* CONFIG_X86_32 */
+
+/*
+ * OK, we're invoking a handler:
+ */
+static int signr_convert(int sig)
+{
+#ifdef CONFIG_X86_32
+	struct thread_info *info = current_thread_info();
+
+	if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32)
+		return info->exec_domain->signal_invmap[sig];
+#endif /* CONFIG_X86_32 */
+	return sig;
+}
+
+#ifdef CONFIG_X86_32
+
+#define is_ia32	1
+#define ia32_setup_frame	__setup_frame
+#define ia32_setup_rt_frame	__setup_rt_frame
+
+#else /* !CONFIG_X86_32 */
+
+#ifdef CONFIG_IA32_EMULATION
+#define is_ia32	test_thread_flag(TIF_IA32)
+#else /* !CONFIG_IA32_EMULATION */
+#define is_ia32	0
+#endif /* CONFIG_IA32_EMULATION */
+
+#endif /* CONFIG_X86_32 */
+
+static int
+setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+	       sigset_t *set, struct pt_regs *regs)
+{
+	int usig = signr_convert(sig);
+	int ret;
+
+	/* Set up the stack frame */
+	if (is_ia32) {
+		if (ka->sa.sa_flags & SA_SIGINFO)
+			ret = ia32_setup_rt_frame(usig, ka, info, set, regs);
+		else
+			ret = ia32_setup_frame(usig, ka, set, regs);
+	} else
+		ret = __setup_rt_frame(sig, ka, info, set, regs);
+
+	if (ret) {
+		force_sigsegv(sig, current);
+		return -EFAULT;
+	}
+
+	return ret;
+}
+
+static int
+handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
+	      sigset_t *oldset, struct pt_regs *regs)
+{
+	int ret;
+
+	/* Are we from a system call? */
+	if (syscall_get_nr(current, regs) >= 0) {
+		/* If so, check system call restarting.. */
+		switch (syscall_get_error(current, regs)) {
+		case -ERESTART_RESTARTBLOCK:
+		case -ERESTARTNOHAND:
+			regs->ax = -EINTR;
+			break;
+
+		case -ERESTARTSYS:
+			if (!(ka->sa.sa_flags & SA_RESTART)) {
+				regs->ax = -EINTR;
+				break;
+			}
+		/* fallthrough */
+		case -ERESTARTNOINTR:
+			regs->ax = regs->orig_ax;
+			regs->ip -= 2;
+			break;
+		}
+	}
+
+	/*
+	 * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF
+	 * flag so that register information in the sigcontext is correct.
+	 */
+	if (unlikely(regs->flags & X86_EFLAGS_TF) &&
+	    likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
+		regs->flags &= ~X86_EFLAGS_TF;
+
+	ret = setup_rt_frame(sig, ka, info, oldset, regs);
+
+	if (ret)
+		return ret;
+
+#ifdef CONFIG_X86_64
+	/*
+	 * This has nothing to do with segment registers,
+	 * despite the name.  This magic affects uaccess.h
+	 * macros' behavior.  Reset it to the normal setting.
+	 */
+	set_fs(USER_DS);
+#endif
+
+	/*
+	 * Clear the direction flag as per the ABI for function entry.
+	 */
+	regs->flags &= ~X86_EFLAGS_DF;
+
+	/*
+	 * Clear TF when entering the signal handler, but
+	 * notify any tracer that was single-stepping it.
+	 * The tracer may want to single-step inside the
+	 * handler too.
+	 */
+	regs->flags &= ~X86_EFLAGS_TF;
+
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
+		sigaddset(&current->blocked, sig);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	tracehook_signal_handler(sig, info, ka, regs,
+				 test_thread_flag(TIF_SINGLESTEP));
+
+	return 0;
+}
+
+#ifdef CONFIG_X86_32
+#define NR_restart_syscall	__NR_restart_syscall
+#else /* !CONFIG_X86_32 */
+#define NR_restart_syscall	\
+	test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
+#endif /* CONFIG_X86_32 */
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ */
+static void do_signal(struct pt_regs *regs)
+{
+	struct k_sigaction ka;
+	siginfo_t info;
+	int signr;
+	sigset_t *oldset;
+
+	/*
+	 * We want the common case to go fast, which is why we may in certain
+	 * cases get here from kernel mode. Just return without doing anything
+	 * if so.
+	 * X86_32: vm86 regs switched out by assembly code before reaching
+	 * here, so testing against kernel CS suffices.
+	 */
+	if (!user_mode(regs))
+		return;
+
+	if (current_thread_info()->status & TS_RESTORE_SIGMASK)
+		oldset = &current->saved_sigmask;
+	else
+		oldset = &current->blocked;
+
+	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+	if (signr > 0) {
+		/*
+		 * Re-enable any watchpoints before delivering the
+		 * signal to user space. The processor register will
+		 * have been cleared if the watchpoint triggered
+		 * inside the kernel.
+		 */
+		if (current->thread.debugreg7)
+			set_debugreg(current->thread.debugreg7, 7);
+
+		/* Whee! Actually deliver the signal.  */
+		if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
+			/*
+			 * A signal was successfully delivered; the saved
+			 * sigmask will have been stored in the signal frame,
+			 * and will be restored by sigreturn, so we can simply
+			 * clear the TS_RESTORE_SIGMASK flag.
+			 */
+			current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+		}
+		return;
+	}
+
+	/* Did we come from a system call? */
+	if (syscall_get_nr(current, regs) >= 0) {
+		/* Restart the system call - no handlers present */
+		switch (syscall_get_error(current, regs)) {
+		case -ERESTARTNOHAND:
+		case -ERESTARTSYS:
+		case -ERESTARTNOINTR:
+			regs->ax = regs->orig_ax;
+			regs->ip -= 2;
+			break;
+
+		case -ERESTART_RESTARTBLOCK:
+			regs->ax = NR_restart_syscall;
+			regs->ip -= 2;
+			break;
+		}
+	}
+
+	/*
+	 * If there's no signal to deliver, we just put the saved sigmask
+	 * back.
+	 */
+	if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
+		current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+	}
+}
+
+/*
+ * notification of userspace execution resumption
+ * - triggered by the TIF_WORK_MASK flags
+ */
+void
+do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
+{
+#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
+	/* notify userspace of pending MCEs */
+	if (thread_info_flags & _TIF_MCE_NOTIFY)
+		mce_notify_user();
+#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
+
+	/* deal with pending signal delivery */
+	if (thread_info_flags & _TIF_SIGPENDING)
+		do_signal(regs);
+
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+	}
+
+#ifdef CONFIG_X86_32
+	clear_thread_flag(TIF_IRET);
+#endif /* CONFIG_X86_32 */
+}
+
+void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
+{
+	struct task_struct *me = current;
+
+	if (show_unhandled_signals && printk_ratelimit()) {
+		printk(KERN_INFO
+		       "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
+		       me->comm, me->pid, where, frame,
+		       regs->ip, regs->sp, regs->orig_ax);
+		print_vma_addr(" in ", regs->ip);
+		printk(KERN_CONT "\n");
+	}
+
+	force_sig(SIGSEGV, me);
+}
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
deleted file mode 100644
index b1f4d34e0a38..000000000000
--- a/arch/x86/kernel/signal_32.c
+++ /dev/null
@@ -1,915 +0,0 @@
-/*
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *  Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
- *
- *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
- *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
- *  2000-2002   x86-64 support by Andi Kleen
- */
-
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/kernel.h>
-#include <linux/signal.h>
-#include <linux/errno.h>
-#include <linux/wait.h>
-#include <linux/ptrace.h>
-#include <linux/tracehook.h>
-#include <linux/unistd.h>
-#include <linux/stddef.h>
-#include <linux/personality.h>
-#include <linux/uaccess.h>
-
-#include <asm/processor.h>
-#include <asm/ucontext.h>
-#include <asm/i387.h>
-#include <asm/vdso.h>
-
-#ifdef CONFIG_X86_64
-#include <asm/proto.h>
-#include <asm/ia32_unistd.h>
-#include <asm/mce.h>
-#endif /* CONFIG_X86_64 */
-
-#include <asm/syscall.h>
-#include <asm/syscalls.h>
-
-#include "sigframe.h"
-
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
-#define __FIX_EFLAGS	(X86_EFLAGS_AC | X86_EFLAGS_OF | \
-			 X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
-			 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
-			 X86_EFLAGS_CF)
-
-#ifdef CONFIG_X86_32
-# define FIX_EFLAGS	(__FIX_EFLAGS | X86_EFLAGS_RF)
-#else
-# define FIX_EFLAGS	__FIX_EFLAGS
-#endif
-
-#define COPY(x)			{		\
-	err |= __get_user(regs->x, &sc->x);	\
-}
-
-#define COPY_SEG(seg)		{			\
-		unsigned short tmp;			\
-		err |= __get_user(tmp, &sc->seg);	\
-		regs->seg = tmp;			\
-}
-
-#define COPY_SEG_CPL3(seg)	{			\
-		unsigned short tmp;			\
-		err |= __get_user(tmp, &sc->seg);	\
-		regs->seg = tmp | 3;			\
-}
-
-#define GET_SEG(seg)		{			\
-		unsigned short tmp;			\
-		err |= __get_user(tmp, &sc->seg);	\
-		loadsegment(seg, tmp);			\
-}
-
-static int
-restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
-		   unsigned long *pax)
-{
-	void __user *buf;
-	unsigned int tmpflags;
-	unsigned int err = 0;
-
-	/* Always make any pending restarted system calls return -EINTR */
-	current_thread_info()->restart_block.fn = do_no_restart_syscall;
-
-#ifdef CONFIG_X86_32
-	GET_SEG(gs);
-	COPY_SEG(fs);
-	COPY_SEG(es);
-	COPY_SEG(ds);
-#endif /* CONFIG_X86_32 */
-
-	COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
-	COPY(dx); COPY(cx); COPY(ip);
-
-#ifdef CONFIG_X86_64
-	COPY(r8);
-	COPY(r9);
-	COPY(r10);
-	COPY(r11);
-	COPY(r12);
-	COPY(r13);
-	COPY(r14);
-	COPY(r15);
-#endif /* CONFIG_X86_64 */
-
-#ifdef CONFIG_X86_32
-	COPY_SEG_CPL3(cs);
-	COPY_SEG_CPL3(ss);
-#else /* !CONFIG_X86_32 */
-	/* Kernel saves and restores only the CS segment register on signals,
-	 * which is the bare minimum needed to allow mixed 32/64-bit code.
-	 * App's signal handler can save/restore other segments if needed. */
-	COPY_SEG_CPL3(cs);
-#endif /* CONFIG_X86_32 */
-
-	err |= __get_user(tmpflags, &sc->flags);
-	regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
-	regs->orig_ax = -1;		/* disable syscall checks */
-
-	err |= __get_user(buf, &sc->fpstate);
-	err |= restore_i387_xstate(buf);
-
-	err |= __get_user(*pax, &sc->ax);
-	return err;
-}
-
-static int
-setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
-		 struct pt_regs *regs, unsigned long mask)
-{
-	int err = 0;
-
-#ifdef CONFIG_X86_32
-	{
-		unsigned int tmp;
-
-		savesegment(gs, tmp);
-		err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
-	}
-	err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
-	err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
-	err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
-#endif /* CONFIG_X86_32 */
-
-	err |= __put_user(regs->di, &sc->di);
-	err |= __put_user(regs->si, &sc->si);
-	err |= __put_user(regs->bp, &sc->bp);
-	err |= __put_user(regs->sp, &sc->sp);
-	err |= __put_user(regs->bx, &sc->bx);
-	err |= __put_user(regs->dx, &sc->dx);
-	err |= __put_user(regs->cx, &sc->cx);
-	err |= __put_user(regs->ax, &sc->ax);
-#ifdef CONFIG_X86_64
-	err |= __put_user(regs->r8, &sc->r8);
-	err |= __put_user(regs->r9, &sc->r9);
-	err |= __put_user(regs->r10, &sc->r10);
-	err |= __put_user(regs->r11, &sc->r11);
-	err |= __put_user(regs->r12, &sc->r12);
-	err |= __put_user(regs->r13, &sc->r13);
-	err |= __put_user(regs->r14, &sc->r14);
-	err |= __put_user(regs->r15, &sc->r15);
-#endif /* CONFIG_X86_64 */
-
-	err |= __put_user(current->thread.trap_no, &sc->trapno);
-	err |= __put_user(current->thread.error_code, &sc->err);
-	err |= __put_user(regs->ip, &sc->ip);
-#ifdef CONFIG_X86_32
-	err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
-	err |= __put_user(regs->flags, &sc->flags);
-	err |= __put_user(regs->sp, &sc->sp_at_signal);
-	err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
-#else /* !CONFIG_X86_32 */
-	err |= __put_user(regs->flags, &sc->flags);
-	err |= __put_user(regs->cs, &sc->cs);
-	err |= __put_user(0, &sc->gs);
-	err |= __put_user(0, &sc->fs);
-#endif /* CONFIG_X86_32 */
-
-	err |= __put_user(fpstate, &sc->fpstate);
-
-	/* non-iBCS2 extensions.. */
-	err |= __put_user(mask, &sc->oldmask);
-	err |= __put_user(current->thread.cr2, &sc->cr2);
-
-	return err;
-}
-
-/*
- * Set up a signal frame.
- */
-#ifdef CONFIG_X86_32
-static const struct {
-	u16 poplmovl;
-	u32 val;
-	u16 int80;
-} __attribute__((packed)) retcode = {
-	0xb858,		/* popl %eax; movl $..., %eax */
-	__NR_sigreturn,
-	0x80cd,		/* int $0x80 */
-};
-
-static const struct {
-	u8  movl;
-	u32 val;
-	u16 int80;
-	u8  pad;
-} __attribute__((packed)) rt_retcode = {
-	0xb8,		/* movl $..., %eax */
-	__NR_rt_sigreturn,
-	0x80cd,		/* int $0x80 */
-	0
-};
-
-/*
- * Determine which stack to use..
- */
-static inline void __user *
-get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
-	     void **fpstate)
-{
-	unsigned long sp;
-
-	/* Default to using normal stack */
-	sp = regs->sp;
-
-	/*
-	 * If we are on the alternate signal stack and would overflow it, don't.
-	 * Return an always-bogus address instead so we will die with SIGSEGV.
-	 */
-	if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size)))
-		return (void __user *) -1L;
-
-	/* This is the X/Open sanctioned signal stack switching.  */
-	if (ka->sa.sa_flags & SA_ONSTACK) {
-		if (sas_ss_flags(sp) == 0)
-			sp = current->sas_ss_sp + current->sas_ss_size;
-	} else {
-		/* This is the legacy signal stack switching. */
-		if ((regs->ss & 0xffff) != __USER_DS &&
-			!(ka->sa.sa_flags & SA_RESTORER) &&
-				ka->sa.sa_restorer)
-			sp = (unsigned long) ka->sa.sa_restorer;
-	}
-
-	if (used_math()) {
-		sp = sp - sig_xstate_size;
-		*fpstate = (struct _fpstate *) sp;
-		if (save_i387_xstate(*fpstate) < 0)
-			return (void __user *)-1L;
-	}
-
-	sp -= frame_size;
-	/*
-	 * Align the stack pointer according to the i386 ABI,
-	 * i.e. so that on function entry ((sp + 4) & 15) == 0.
-	 */
-	sp = ((sp + 4) & -16ul) - 4;
-
-	return (void __user *) sp;
-}
-
-static int
-__setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
-	      struct pt_regs *regs)
-{
-	struct sigframe __user *frame;
-	void __user *restorer;
-	int err = 0;
-	void __user *fpstate = NULL;
-
-	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
-
-	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		return -EFAULT;
-
-	if (__put_user(sig, &frame->sig))
-		return -EFAULT;
-
-	if (setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]))
-		return -EFAULT;
-
-	if (_NSIG_WORDS > 1) {
-		if (__copy_to_user(&frame->extramask, &set->sig[1],
-				   sizeof(frame->extramask)))
-			return -EFAULT;
-	}
-
-	if (current->mm->context.vdso)
-		restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn);
-	else
-		restorer = &frame->retcode;
-	if (ka->sa.sa_flags & SA_RESTORER)
-		restorer = ka->sa.sa_restorer;
-
-	/* Set up to return from userspace.  */
-	err |= __put_user(restorer, &frame->pretcode);
-
-	/*
-	 * This is popl %eax ; movl $__NR_sigreturn, %eax ; int $0x80
-	 *
-	 * WE DO NOT USE IT ANY MORE! It's only left here for historical
-	 * reasons and because gdb uses it as a signature to notice
-	 * signal handler stack frames.
-	 */
-	err |= __put_user(*((u64 *)&retcode), (u64 *)frame->retcode);
-
-	if (err)
-		return -EFAULT;
-
-	/* Set up registers for signal handler */
-	regs->sp = (unsigned long)frame;
-	regs->ip = (unsigned long)ka->sa.sa_handler;
-	regs->ax = (unsigned long)sig;
-	regs->dx = 0;
-	regs->cx = 0;
-
-	regs->ds = __USER_DS;
-	regs->es = __USER_DS;
-	regs->ss = __USER_DS;
-	regs->cs = __USER_CS;
-
-	return 0;
-}
-
-static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-			    sigset_t *set, struct pt_regs *regs)
-{
-	struct rt_sigframe __user *frame;
-	void __user *restorer;
-	int err = 0;
-	void __user *fpstate = NULL;
-
-	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
-
-	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		return -EFAULT;
-
-	err |= __put_user(sig, &frame->sig);
-	err |= __put_user(&frame->info, &frame->pinfo);
-	err |= __put_user(&frame->uc, &frame->puc);
-	err |= copy_siginfo_to_user(&frame->info, info);
-	if (err)
-		return -EFAULT;
-
-	/* Create the ucontext.  */
-	if (cpu_has_xsave)
-		err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
-	else
-		err |= __put_user(0, &frame->uc.uc_flags);
-	err |= __put_user(0, &frame->uc.uc_link);
-	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(regs->sp),
-			  &frame->uc.uc_stack.ss_flags);
-	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
-	err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
-				regs, set->sig[0]);
-	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-	if (err)
-		return -EFAULT;
-
-	/* Set up to return from userspace.  */
-	restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
-	if (ka->sa.sa_flags & SA_RESTORER)
-		restorer = ka->sa.sa_restorer;
-	err |= __put_user(restorer, &frame->pretcode);
-
-	/*
-	 * This is movl $__NR_rt_sigreturn, %ax ; int $0x80
-	 *
-	 * WE DO NOT USE IT ANY MORE! It's only left here for historical
-	 * reasons and because gdb uses it as a signature to notice
-	 * signal handler stack frames.
-	 */
-	err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
-
-	if (err)
-		return -EFAULT;
-
-	/* Set up registers for signal handler */
-	regs->sp = (unsigned long)frame;
-	regs->ip = (unsigned long)ka->sa.sa_handler;
-	regs->ax = (unsigned long)sig;
-	regs->dx = (unsigned long)&frame->info;
-	regs->cx = (unsigned long)&frame->uc;
-
-	regs->ds = __USER_DS;
-	regs->es = __USER_DS;
-	regs->ss = __USER_DS;
-	regs->cs = __USER_CS;
-
-	return 0;
-}
-#else /* !CONFIG_X86_32 */
-/*
- * Determine which stack to use..
- */
-static void __user *
-get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size)
-{
-	/* Default to using normal stack - redzone*/
-	sp -= 128;
-
-	/* This is the X/Open sanctioned signal stack switching.  */
-	if (ka->sa.sa_flags & SA_ONSTACK) {
-		if (sas_ss_flags(sp) == 0)
-			sp = current->sas_ss_sp + current->sas_ss_size;
-	}
-
-	return (void __user *)round_down(sp - size, 64);
-}
-
-static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-			    sigset_t *set, struct pt_regs *regs)
-{
-	struct rt_sigframe __user *frame;
-	void __user *fp = NULL;
-	int err = 0;
-	struct task_struct *me = current;
-
-	if (used_math()) {
-		fp = get_stack(ka, regs->sp, sig_xstate_size);
-		frame = (void __user *)round_down(
-			(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
-
-		if (save_i387_xstate(fp) < 0)
-			return -EFAULT;
-	} else
-		frame = get_stack(ka, regs->sp, sizeof(struct rt_sigframe)) - 8;
-
-	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		return -EFAULT;
-
-	if (ka->sa.sa_flags & SA_SIGINFO) {
-		if (copy_siginfo_to_user(&frame->info, info))
-			return -EFAULT;
-	}
-
-	/* Create the ucontext.  */
-	if (cpu_has_xsave)
-		err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
-	else
-		err |= __put_user(0, &frame->uc.uc_flags);
-	err |= __put_user(0, &frame->uc.uc_link);
-	err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(regs->sp),
-			  &frame->uc.uc_stack.ss_flags);
-	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
-	err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
-	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-
-	/* Set up to return from userspace.  If provided, use a stub
-	   already in userspace.  */
-	/* x86-64 should always use SA_RESTORER. */
-	if (ka->sa.sa_flags & SA_RESTORER) {
-		err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
-	} else {
-		/* could use a vstub here */
-		return -EFAULT;
-	}
-
-	if (err)
-		return -EFAULT;
-
-	/* Set up registers for signal handler */
-	regs->di = sig;
-	/* In case the signal handler was declared without prototypes */
-	regs->ax = 0;
-
-	/* This also works for non SA_SIGINFO handlers because they expect the
-	   next argument after the signal number on the stack. */
-	regs->si = (unsigned long)&frame->info;
-	regs->dx = (unsigned long)&frame->uc;
-	regs->ip = (unsigned long) ka->sa.sa_handler;
-
-	regs->sp = (unsigned long)frame;
-
-	/* Set up the CS register to run signal handlers in 64-bit mode,
-	   even if the handler happens to be interrupting 32-bit code. */
-	regs->cs = __USER_CS;
-
-	return 0;
-}
-#endif /* CONFIG_X86_32 */
-
-#ifdef CONFIG_X86_32
-/*
- * Atomically swap in the new signal mask, and wait for a signal.
- */
-asmlinkage int
-sys_sigsuspend(int history0, int history1, old_sigset_t mask)
-{
-	mask &= _BLOCKABLE;
-	spin_lock_irq(&current->sighand->siglock);
-	current->saved_sigmask = current->blocked;
-	siginitset(&current->blocked, mask);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	current->state = TASK_INTERRUPTIBLE;
-	schedule();
-	set_restore_sigmask();
-
-	return -ERESTARTNOHAND;
-}
-
-asmlinkage int
-sys_sigaction(int sig, const struct old_sigaction __user *act,
-	      struct old_sigaction __user *oact)
-{
-	struct k_sigaction new_ka, old_ka;
-	int ret;
-
-	if (act) {
-		old_sigset_t mask;
-
-		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
-		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
-		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
-			return -EFAULT;
-
-		__get_user(new_ka.sa.sa_flags, &act->sa_flags);
-		__get_user(mask, &act->sa_mask);
-		siginitset(&new_ka.sa.sa_mask, mask);
-	}
-
-	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
-	if (!ret && oact) {
-		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
-		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
-		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
-			return -EFAULT;
-
-		__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
-		__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
-	}
-
-	return ret;
-}
-#endif /* CONFIG_X86_32 */
-
-#ifdef CONFIG_X86_32
-asmlinkage int sys_sigaltstack(unsigned long bx)
-{
-	/*
-	 * This is needed to make gcc realize it doesn't own the
-	 * "struct pt_regs"
-	 */
-	struct pt_regs *regs = (struct pt_regs *)&bx;
-	const stack_t __user *uss = (const stack_t __user *)bx;
-	stack_t __user *uoss = (stack_t __user *)regs->cx;
-
-	return do_sigaltstack(uss, uoss, regs->sp);
-}
-#else /* !CONFIG_X86_32 */
-asmlinkage long
-sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
-		struct pt_regs *regs)
-{
-	return do_sigaltstack(uss, uoss, regs->sp);
-}
-#endif /* CONFIG_X86_32 */
-
-/*
- * Do a signal return; undo the signal stack.
- */
-#ifdef CONFIG_X86_32
-asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
-{
-	struct sigframe __user *frame;
-	struct pt_regs *regs;
-	unsigned long ax;
-	sigset_t set;
-
-	regs = (struct pt_regs *) &__unused;
-	frame = (struct sigframe __user *)(regs->sp - 8);
-
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-	if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1
-		&& __copy_from_user(&set.sig[1], &frame->extramask,
-				    sizeof(frame->extramask))))
-		goto badframe;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	if (restore_sigcontext(regs, &frame->sc, &ax))
-		goto badframe;
-	return ax;
-
-badframe:
-	if (show_unhandled_signals && printk_ratelimit()) {
-		printk("%s%s[%d] bad frame in sigreturn frame:"
-			"%p ip:%lx sp:%lx oeax:%lx",
-		    task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
-		    current->comm, task_pid_nr(current), frame, regs->ip,
-		    regs->sp, regs->orig_ax);
-		print_vma_addr(" in ", regs->ip);
-		printk(KERN_CONT "\n");
-	}
-
-	force_sig(SIGSEGV, current);
-
-	return 0;
-}
-#endif /* CONFIG_X86_32 */
-
-static long do_rt_sigreturn(struct pt_regs *regs)
-{
-	struct rt_sigframe __user *frame;
-	unsigned long ax;
-	sigset_t set;
-
-	frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
-		goto badframe;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
-		goto badframe;
-
-	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
-		goto badframe;
-
-	return ax;
-
-badframe:
-	signal_fault(regs, frame, "rt_sigreturn");
-	return 0;
-}
-
-#ifdef CONFIG_X86_32
-asmlinkage int sys_rt_sigreturn(unsigned long __unused)
-{
-	struct pt_regs *regs = (struct pt_regs *)&__unused;
-
-	return do_rt_sigreturn(regs);
-}
-#else /* !CONFIG_X86_32 */
-asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
-{
-	return do_rt_sigreturn(regs);
-}
-#endif /* CONFIG_X86_32 */
-
-/*
- * OK, we're invoking a handler:
- */
-static int signr_convert(int sig)
-{
-#ifdef CONFIG_X86_32
-	struct thread_info *info = current_thread_info();
-
-	if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32)
-		return info->exec_domain->signal_invmap[sig];
-#endif /* CONFIG_X86_32 */
-	return sig;
-}
-
-#ifdef CONFIG_X86_32
-
-#define is_ia32	1
-#define ia32_setup_frame	__setup_frame
-#define ia32_setup_rt_frame	__setup_rt_frame
-
-#else /* !CONFIG_X86_32 */
-
-#ifdef CONFIG_IA32_EMULATION
-#define is_ia32	test_thread_flag(TIF_IA32)
-#else /* !CONFIG_IA32_EMULATION */
-#define is_ia32	0
-#endif /* CONFIG_IA32_EMULATION */
-
-#endif /* CONFIG_X86_32 */
-
-static int
-setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-	       sigset_t *set, struct pt_regs *regs)
-{
-	int usig = signr_convert(sig);
-	int ret;
-
-	/* Set up the stack frame */
-	if (is_ia32) {
-		if (ka->sa.sa_flags & SA_SIGINFO)
-			ret = ia32_setup_rt_frame(usig, ka, info, set, regs);
-		else
-			ret = ia32_setup_frame(usig, ka, set, regs);
-	} else
-		ret = __setup_rt_frame(sig, ka, info, set, regs);
-
-	if (ret) {
-		force_sigsegv(sig, current);
-		return -EFAULT;
-	}
-
-	return ret;
-}
-
-static int
-handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
-	      sigset_t *oldset, struct pt_regs *regs)
-{
-	int ret;
-
-	/* Are we from a system call? */
-	if (syscall_get_nr(current, regs) >= 0) {
-		/* If so, check system call restarting.. */
-		switch (syscall_get_error(current, regs)) {
-		case -ERESTART_RESTARTBLOCK:
-		case -ERESTARTNOHAND:
-			regs->ax = -EINTR;
-			break;
-
-		case -ERESTARTSYS:
-			if (!(ka->sa.sa_flags & SA_RESTART)) {
-				regs->ax = -EINTR;
-				break;
-			}
-		/* fallthrough */
-		case -ERESTARTNOINTR:
-			regs->ax = regs->orig_ax;
-			regs->ip -= 2;
-			break;
-		}
-	}
-
-	/*
-	 * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF
-	 * flag so that register information in the sigcontext is correct.
-	 */
-	if (unlikely(regs->flags & X86_EFLAGS_TF) &&
-	    likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
-		regs->flags &= ~X86_EFLAGS_TF;
-
-	ret = setup_rt_frame(sig, ka, info, oldset, regs);
-
-	if (ret)
-		return ret;
-
-#ifdef CONFIG_X86_64
-	/*
-	 * This has nothing to do with segment registers,
-	 * despite the name.  This magic affects uaccess.h
-	 * macros' behavior.  Reset it to the normal setting.
-	 */
-	set_fs(USER_DS);
-#endif
-
-	/*
-	 * Clear the direction flag as per the ABI for function entry.
-	 */
-	regs->flags &= ~X86_EFLAGS_DF;
-
-	/*
-	 * Clear TF when entering the signal handler, but
-	 * notify any tracer that was single-stepping it.
-	 * The tracer may want to single-step inside the
-	 * handler too.
-	 */
-	regs->flags &= ~X86_EFLAGS_TF;
-
-	spin_lock_irq(&current->sighand->siglock);
-	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
-	if (!(ka->sa.sa_flags & SA_NODEFER))
-		sigaddset(&current->blocked, sig);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	tracehook_signal_handler(sig, info, ka, regs,
-				 test_thread_flag(TIF_SINGLESTEP));
-
-	return 0;
-}
-
-#ifdef CONFIG_X86_32
-#define NR_restart_syscall	__NR_restart_syscall
-#else /* !CONFIG_X86_32 */
-#define NR_restart_syscall	\
-	test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
-#endif /* CONFIG_X86_32 */
-
-/*
- * Note that 'init' is a special process: it doesn't get signals it doesn't
- * want to handle. Thus you cannot kill init even with a SIGKILL even by
- * mistake.
- */
-static void do_signal(struct pt_regs *regs)
-{
-	struct k_sigaction ka;
-	siginfo_t info;
-	int signr;
-	sigset_t *oldset;
-
-	/*
-	 * We want the common case to go fast, which is why we may in certain
-	 * cases get here from kernel mode. Just return without doing anything
-	 * if so.
-	 * X86_32: vm86 regs switched out by assembly code before reaching
-	 * here, so testing against kernel CS suffices.
-	 */
-	if (!user_mode(regs))
-		return;
-
-	if (current_thread_info()->status & TS_RESTORE_SIGMASK)
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
-	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
-	if (signr > 0) {
-		/*
-		 * Re-enable any watchpoints before delivering the
-		 * signal to user space. The processor register will
-		 * have been cleared if the watchpoint triggered
-		 * inside the kernel.
-		 */
-		if (current->thread.debugreg7)
-			set_debugreg(current->thread.debugreg7, 7);
-
-		/* Whee! Actually deliver the signal.  */
-		if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
-			/*
-			 * A signal was successfully delivered; the saved
-			 * sigmask will have been stored in the signal frame,
-			 * and will be restored by sigreturn, so we can simply
-			 * clear the TS_RESTORE_SIGMASK flag.
-			 */
-			current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-		}
-		return;
-	}
-
-	/* Did we come from a system call? */
-	if (syscall_get_nr(current, regs) >= 0) {
-		/* Restart the system call - no handlers present */
-		switch (syscall_get_error(current, regs)) {
-		case -ERESTARTNOHAND:
-		case -ERESTARTSYS:
-		case -ERESTARTNOINTR:
-			regs->ax = regs->orig_ax;
-			regs->ip -= 2;
-			break;
-
-		case -ERESTART_RESTARTBLOCK:
-			regs->ax = NR_restart_syscall;
-			regs->ip -= 2;
-			break;
-		}
-	}
-
-	/*
-	 * If there's no signal to deliver, we just put the saved sigmask
-	 * back.
-	 */
-	if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
-		current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
-}
-
-/*
- * notification of userspace execution resumption
- * - triggered by the TIF_WORK_MASK flags
- */
-void
-do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
-{
-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
-	/* notify userspace of pending MCEs */
-	if (thread_info_flags & _TIF_MCE_NOTIFY)
-		mce_notify_user();
-#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
-
-	/* deal with pending signal delivery */
-	if (thread_info_flags & _TIF_SIGPENDING)
-		do_signal(regs);
-
-	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
-		clear_thread_flag(TIF_NOTIFY_RESUME);
-		tracehook_notify_resume(regs);
-	}
-
-#ifdef CONFIG_X86_32
-	clear_thread_flag(TIF_IRET);
-#endif /* CONFIG_X86_32 */
-}
-
-void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
-{
-	struct task_struct *me = current;
-
-	if (show_unhandled_signals && printk_ratelimit()) {
-		printk(KERN_INFO
-		       "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
-		       me->comm, me->pid, where, frame,
-		       regs->ip, regs->sp, regs->orig_ax);
-		print_vma_addr(" in ", regs->ip);
-		printk(KERN_CONT "\n");
-	}
-
-	force_sig(SIGSEGV, me);
-}
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
deleted file mode 100644
index b1f4d34e0a38..000000000000
--- a/arch/x86/kernel/signal_64.c
+++ /dev/null
@@ -1,915 +0,0 @@
-/*
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *  Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
- *
- *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
- *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
- *  2000-2002   x86-64 support by Andi Kleen
- */
-
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/kernel.h>
-#include <linux/signal.h>
-#include <linux/errno.h>
-#include <linux/wait.h>
-#include <linux/ptrace.h>
-#include <linux/tracehook.h>
-#include <linux/unistd.h>
-#include <linux/stddef.h>
-#include <linux/personality.h>
-#include <linux/uaccess.h>
-
-#include <asm/processor.h>
-#include <asm/ucontext.h>
-#include <asm/i387.h>
-#include <asm/vdso.h>
-
-#ifdef CONFIG_X86_64
-#include <asm/proto.h>
-#include <asm/ia32_unistd.h>
-#include <asm/mce.h>
-#endif /* CONFIG_X86_64 */
-
-#include <asm/syscall.h>
-#include <asm/syscalls.h>
-
-#include "sigframe.h"
-
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
-#define __FIX_EFLAGS	(X86_EFLAGS_AC | X86_EFLAGS_OF | \
-			 X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
-			 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
-			 X86_EFLAGS_CF)
-
-#ifdef CONFIG_X86_32
-# define FIX_EFLAGS	(__FIX_EFLAGS | X86_EFLAGS_RF)
-#else
-# define FIX_EFLAGS	__FIX_EFLAGS
-#endif
-
-#define COPY(x)			{		\
-	err |= __get_user(regs->x, &sc->x);	\
-}
-
-#define COPY_SEG(seg)		{			\
-		unsigned short tmp;			\
-		err |= __get_user(tmp, &sc->seg);	\
-		regs->seg = tmp;			\
-}
-
-#define COPY_SEG_CPL3(seg)	{			\
-		unsigned short tmp;			\
-		err |= __get_user(tmp, &sc->seg);	\
-		regs->seg = tmp | 3;			\
-}
-
-#define GET_SEG(seg)		{			\
-		unsigned short tmp;			\
-		err |= __get_user(tmp, &sc->seg);	\
-		loadsegment(seg, tmp);			\
-}
-
-static int
-restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
-		   unsigned long *pax)
-{
-	void __user *buf;
-	unsigned int tmpflags;
-	unsigned int err = 0;
-
-	/* Always make any pending restarted system calls return -EINTR */
-	current_thread_info()->restart_block.fn = do_no_restart_syscall;
-
-#ifdef CONFIG_X86_32
-	GET_SEG(gs);
-	COPY_SEG(fs);
-	COPY_SEG(es);
-	COPY_SEG(ds);
-#endif /* CONFIG_X86_32 */
-
-	COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
-	COPY(dx); COPY(cx); COPY(ip);
-
-#ifdef CONFIG_X86_64
-	COPY(r8);
-	COPY(r9);
-	COPY(r10);
-	COPY(r11);
-	COPY(r12);
-	COPY(r13);
-	COPY(r14);
-	COPY(r15);
-#endif /* CONFIG_X86_64 */
-
-#ifdef CONFIG_X86_32
-	COPY_SEG_CPL3(cs);
-	COPY_SEG_CPL3(ss);
-#else /* !CONFIG_X86_32 */
-	/* Kernel saves and restores only the CS segment register on signals,
-	 * which is the bare minimum needed to allow mixed 32/64-bit code.
-	 * App's signal handler can save/restore other segments if needed. */
-	COPY_SEG_CPL3(cs);
-#endif /* CONFIG_X86_32 */
-
-	err |= __get_user(tmpflags, &sc->flags);
-	regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
-	regs->orig_ax = -1;		/* disable syscall checks */
-
-	err |= __get_user(buf, &sc->fpstate);
-	err |= restore_i387_xstate(buf);
-
-	err |= __get_user(*pax, &sc->ax);
-	return err;
-}
-
-static int
-setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
-		 struct pt_regs *regs, unsigned long mask)
-{
-	int err = 0;
-
-#ifdef CONFIG_X86_32
-	{
-		unsigned int tmp;
-
-		savesegment(gs, tmp);
-		err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
-	}
-	err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
-	err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
-	err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
-#endif /* CONFIG_X86_32 */
-
-	err |= __put_user(regs->di, &sc->di);
-	err |= __put_user(regs->si, &sc->si);
-	err |= __put_user(regs->bp, &sc->bp);
-	err |= __put_user(regs->sp, &sc->sp);
-	err |= __put_user(regs->bx, &sc->bx);
-	err |= __put_user(regs->dx, &sc->dx);
-	err |= __put_user(regs->cx, &sc->cx);
-	err |= __put_user(regs->ax, &sc->ax);
-#ifdef CONFIG_X86_64
-	err |= __put_user(regs->r8, &sc->r8);
-	err |= __put_user(regs->r9, &sc->r9);
-	err |= __put_user(regs->r10, &sc->r10);
-	err |= __put_user(regs->r11, &sc->r11);
-	err |= __put_user(regs->r12, &sc->r12);
-	err |= __put_user(regs->r13, &sc->r13);
-	err |= __put_user(regs->r14, &sc->r14);
-	err |= __put_user(regs->r15, &sc->r15);
-#endif /* CONFIG_X86_64 */
-
-	err |= __put_user(current->thread.trap_no, &sc->trapno);
-	err |= __put_user(current->thread.error_code, &sc->err);
-	err |= __put_user(regs->ip, &sc->ip);
-#ifdef CONFIG_X86_32
-	err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
-	err |= __put_user(regs->flags, &sc->flags);
-	err |= __put_user(regs->sp, &sc->sp_at_signal);
-	err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
-#else /* !CONFIG_X86_32 */
-	err |= __put_user(regs->flags, &sc->flags);
-	err |= __put_user(regs->cs, &sc->cs);
-	err |= __put_user(0, &sc->gs);
-	err |= __put_user(0, &sc->fs);
-#endif /* CONFIG_X86_32 */
-
-	err |= __put_user(fpstate, &sc->fpstate);
-
-	/* non-iBCS2 extensions.. */
-	err |= __put_user(mask, &sc->oldmask);
-	err |= __put_user(current->thread.cr2, &sc->cr2);
-
-	return err;
-}
-
-/*
- * Set up a signal frame.
- */
-#ifdef CONFIG_X86_32
-static const struct {
-	u16 poplmovl;
-	u32 val;
-	u16 int80;
-} __attribute__((packed)) retcode = {
-	0xb858,		/* popl %eax; movl $..., %eax */
-	__NR_sigreturn,
-	0x80cd,		/* int $0x80 */
-};
-
-static const struct {
-	u8  movl;
-	u32 val;
-	u16 int80;
-	u8  pad;
-} __attribute__((packed)) rt_retcode = {
-	0xb8,		/* movl $..., %eax */
-	__NR_rt_sigreturn,
-	0x80cd,		/* int $0x80 */
-	0
-};
-
-/*
- * Determine which stack to use..
- */
-static inline void __user *
-get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
-	     void **fpstate)
-{
-	unsigned long sp;
-
-	/* Default to using normal stack */
-	sp = regs->sp;
-
-	/*
-	 * If we are on the alternate signal stack and would overflow it, don't.
-	 * Return an always-bogus address instead so we will die with SIGSEGV.
-	 */
-	if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size)))
-		return (void __user *) -1L;
-
-	/* This is the X/Open sanctioned signal stack switching.  */
-	if (ka->sa.sa_flags & SA_ONSTACK) {
-		if (sas_ss_flags(sp) == 0)
-			sp = current->sas_ss_sp + current->sas_ss_size;
-	} else {
-		/* This is the legacy signal stack switching. */
-		if ((regs->ss & 0xffff) != __USER_DS &&
-			!(ka->sa.sa_flags & SA_RESTORER) &&
-				ka->sa.sa_restorer)
-			sp = (unsigned long) ka->sa.sa_restorer;
-	}
-
-	if (used_math()) {
-		sp = sp - sig_xstate_size;
-		*fpstate = (struct _fpstate *) sp;
-		if (save_i387_xstate(*fpstate) < 0)
-			return (void __user *)-1L;
-	}
-
-	sp -= frame_size;
-	/*
-	 * Align the stack pointer according to the i386 ABI,
-	 * i.e. so that on function entry ((sp + 4) & 15) == 0.
-	 */
-	sp = ((sp + 4) & -16ul) - 4;
-
-	return (void __user *) sp;
-}
-
-static int
-__setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
-	      struct pt_regs *regs)
-{
-	struct sigframe __user *frame;
-	void __user *restorer;
-	int err = 0;
-	void __user *fpstate = NULL;
-
-	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
-
-	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		return -EFAULT;
-
-	if (__put_user(sig, &frame->sig))
-		return -EFAULT;
-
-	if (setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]))
-		return -EFAULT;
-
-	if (_NSIG_WORDS > 1) {
-		if (__copy_to_user(&frame->extramask, &set->sig[1],
-				   sizeof(frame->extramask)))
-			return -EFAULT;
-	}
-
-	if (current->mm->context.vdso)
-		restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn);
-	else
-		restorer = &frame->retcode;
-	if (ka->sa.sa_flags & SA_RESTORER)
-		restorer = ka->sa.sa_restorer;
-
-	/* Set up to return from userspace.  */
-	err |= __put_user(restorer, &frame->pretcode);
-
-	/*
-	 * This is popl %eax ; movl $__NR_sigreturn, %eax ; int $0x80
-	 *
-	 * WE DO NOT USE IT ANY MORE! It's only left here for historical
-	 * reasons and because gdb uses it as a signature to notice
-	 * signal handler stack frames.
-	 */
-	err |= __put_user(*((u64 *)&retcode), (u64 *)frame->retcode);
-
-	if (err)
-		return -EFAULT;
-
-	/* Set up registers for signal handler */
-	regs->sp = (unsigned long)frame;
-	regs->ip = (unsigned long)ka->sa.sa_handler;
-	regs->ax = (unsigned long)sig;
-	regs->dx = 0;
-	regs->cx = 0;
-
-	regs->ds = __USER_DS;
-	regs->es = __USER_DS;
-	regs->ss = __USER_DS;
-	regs->cs = __USER_CS;
-
-	return 0;
-}
-
-static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-			    sigset_t *set, struct pt_regs *regs)
-{
-	struct rt_sigframe __user *frame;
-	void __user *restorer;
-	int err = 0;
-	void __user *fpstate = NULL;
-
-	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
-
-	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		return -EFAULT;
-
-	err |= __put_user(sig, &frame->sig);
-	err |= __put_user(&frame->info, &frame->pinfo);
-	err |= __put_user(&frame->uc, &frame->puc);
-	err |= copy_siginfo_to_user(&frame->info, info);
-	if (err)
-		return -EFAULT;
-
-	/* Create the ucontext.  */
-	if (cpu_has_xsave)
-		err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
-	else
-		err |= __put_user(0, &frame->uc.uc_flags);
-	err |= __put_user(0, &frame->uc.uc_link);
-	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(regs->sp),
-			  &frame->uc.uc_stack.ss_flags);
-	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
-	err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
-				regs, set->sig[0]);
-	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-	if (err)
-		return -EFAULT;
-
-	/* Set up to return from userspace.  */
-	restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
-	if (ka->sa.sa_flags & SA_RESTORER)
-		restorer = ka->sa.sa_restorer;
-	err |= __put_user(restorer, &frame->pretcode);
-
-	/*
-	 * This is movl $__NR_rt_sigreturn, %ax ; int $0x80
-	 *
-	 * WE DO NOT USE IT ANY MORE! It's only left here for historical
-	 * reasons and because gdb uses it as a signature to notice
-	 * signal handler stack frames.
-	 */
-	err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
-
-	if (err)
-		return -EFAULT;
-
-	/* Set up registers for signal handler */
-	regs->sp = (unsigned long)frame;
-	regs->ip = (unsigned long)ka->sa.sa_handler;
-	regs->ax = (unsigned long)sig;
-	regs->dx = (unsigned long)&frame->info;
-	regs->cx = (unsigned long)&frame->uc;
-
-	regs->ds = __USER_DS;
-	regs->es = __USER_DS;
-	regs->ss = __USER_DS;
-	regs->cs = __USER_CS;
-
-	return 0;
-}
-#else /* !CONFIG_X86_32 */
-/*
- * Determine which stack to use..
- */
-static void __user *
-get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size)
-{
-	/* Default to using normal stack - redzone*/
-	sp -= 128;
-
-	/* This is the X/Open sanctioned signal stack switching.  */
-	if (ka->sa.sa_flags & SA_ONSTACK) {
-		if (sas_ss_flags(sp) == 0)
-			sp = current->sas_ss_sp + current->sas_ss_size;
-	}
-
-	return (void __user *)round_down(sp - size, 64);
-}
-
-static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-			    sigset_t *set, struct pt_regs *regs)
-{
-	struct rt_sigframe __user *frame;
-	void __user *fp = NULL;
-	int err = 0;
-	struct task_struct *me = current;
-
-	if (used_math()) {
-		fp = get_stack(ka, regs->sp, sig_xstate_size);
-		frame = (void __user *)round_down(
-			(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
-
-		if (save_i387_xstate(fp) < 0)
-			return -EFAULT;
-	} else
-		frame = get_stack(ka, regs->sp, sizeof(struct rt_sigframe)) - 8;
-
-	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		return -EFAULT;
-
-	if (ka->sa.sa_flags & SA_SIGINFO) {
-		if (copy_siginfo_to_user(&frame->info, info))
-			return -EFAULT;
-	}
-
-	/* Create the ucontext.  */
-	if (cpu_has_xsave)
-		err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
-	else
-		err |= __put_user(0, &frame->uc.uc_flags);
-	err |= __put_user(0, &frame->uc.uc_link);
-	err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(regs->sp),
-			  &frame->uc.uc_stack.ss_flags);
-	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
-	err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
-	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-
-	/* Set up to return from userspace.  If provided, use a stub
-	   already in userspace.  */
-	/* x86-64 should always use SA_RESTORER. */
-	if (ka->sa.sa_flags & SA_RESTORER) {
-		err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
-	} else {
-		/* could use a vstub here */
-		return -EFAULT;
-	}
-
-	if (err)
-		return -EFAULT;
-
-	/* Set up registers for signal handler */
-	regs->di = sig;
-	/* In case the signal handler was declared without prototypes */
-	regs->ax = 0;
-
-	/* This also works for non SA_SIGINFO handlers because they expect the
-	   next argument after the signal number on the stack. */
-	regs->si = (unsigned long)&frame->info;
-	regs->dx = (unsigned long)&frame->uc;
-	regs->ip = (unsigned long) ka->sa.sa_handler;
-
-	regs->sp = (unsigned long)frame;
-
-	/* Set up the CS register to run signal handlers in 64-bit mode,
-	   even if the handler happens to be interrupting 32-bit code. */
-	regs->cs = __USER_CS;
-
-	return 0;
-}
-#endif /* CONFIG_X86_32 */
-
-#ifdef CONFIG_X86_32
-/*
- * Atomically swap in the new signal mask, and wait for a signal.
- */
-asmlinkage int
-sys_sigsuspend(int history0, int history1, old_sigset_t mask)
-{
-	mask &= _BLOCKABLE;
-	spin_lock_irq(&current->sighand->siglock);
-	current->saved_sigmask = current->blocked;
-	siginitset(&current->blocked, mask);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	current->state = TASK_INTERRUPTIBLE;
-	schedule();
-	set_restore_sigmask();
-
-	return -ERESTARTNOHAND;
-}
-
-asmlinkage int
-sys_sigaction(int sig, const struct old_sigaction __user *act,
-	      struct old_sigaction __user *oact)
-{
-	struct k_sigaction new_ka, old_ka;
-	int ret;
-
-	if (act) {
-		old_sigset_t mask;
-
-		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
-		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
-		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
-			return -EFAULT;
-
-		__get_user(new_ka.sa.sa_flags, &act->sa_flags);
-		__get_user(mask, &act->sa_mask);
-		siginitset(&new_ka.sa.sa_mask, mask);
-	}
-
-	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
-	if (!ret && oact) {
-		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
-		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
-		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
-			return -EFAULT;
-
-		__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
-		__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
-	}
-
-	return ret;
-}
-#endif /* CONFIG_X86_32 */
-
-#ifdef CONFIG_X86_32
-asmlinkage int sys_sigaltstack(unsigned long bx)
-{
-	/*
-	 * This is needed to make gcc realize it doesn't own the
-	 * "struct pt_regs"
-	 */
-	struct pt_regs *regs = (struct pt_regs *)&bx;
-	const stack_t __user *uss = (const stack_t __user *)bx;
-	stack_t __user *uoss = (stack_t __user *)regs->cx;
-
-	return do_sigaltstack(uss, uoss, regs->sp);
-}
-#else /* !CONFIG_X86_32 */
-asmlinkage long
-sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
-		struct pt_regs *regs)
-{
-	return do_sigaltstack(uss, uoss, regs->sp);
-}
-#endif /* CONFIG_X86_32 */
-
-/*
- * Do a signal return; undo the signal stack.
- */
-#ifdef CONFIG_X86_32
-asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
-{
-	struct sigframe __user *frame;
-	struct pt_regs *regs;
-	unsigned long ax;
-	sigset_t set;
-
-	regs = (struct pt_regs *) &__unused;
-	frame = (struct sigframe __user *)(regs->sp - 8);
-
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-	if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1
-		&& __copy_from_user(&set.sig[1], &frame->extramask,
-				    sizeof(frame->extramask))))
-		goto badframe;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	if (restore_sigcontext(regs, &frame->sc, &ax))
-		goto badframe;
-	return ax;
-
-badframe:
-	if (show_unhandled_signals && printk_ratelimit()) {
-		printk("%s%s[%d] bad frame in sigreturn frame:"
-			"%p ip:%lx sp:%lx oeax:%lx",
-		    task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
-		    current->comm, task_pid_nr(current), frame, regs->ip,
-		    regs->sp, regs->orig_ax);
-		print_vma_addr(" in ", regs->ip);
-		printk(KERN_CONT "\n");
-	}
-
-	force_sig(SIGSEGV, current);
-
-	return 0;
-}
-#endif /* CONFIG_X86_32 */
-
-static long do_rt_sigreturn(struct pt_regs *regs)
-{
-	struct rt_sigframe __user *frame;
-	unsigned long ax;
-	sigset_t set;
-
-	frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
-		goto badframe;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
-		goto badframe;
-
-	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
-		goto badframe;
-
-	return ax;
-
-badframe:
-	signal_fault(regs, frame, "rt_sigreturn");
-	return 0;
-}
-
-#ifdef CONFIG_X86_32
-asmlinkage int sys_rt_sigreturn(unsigned long __unused)
-{
-	struct pt_regs *regs = (struct pt_regs *)&__unused;
-
-	return do_rt_sigreturn(regs);
-}
-#else /* !CONFIG_X86_32 */
-asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
-{
-	return do_rt_sigreturn(regs);
-}
-#endif /* CONFIG_X86_32 */
-
-/*
- * OK, we're invoking a handler:
- */
-static int signr_convert(int sig)
-{
-#ifdef CONFIG_X86_32
-	struct thread_info *info = current_thread_info();
-
-	if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32)
-		return info->exec_domain->signal_invmap[sig];
-#endif /* CONFIG_X86_32 */
-	return sig;
-}
-
-#ifdef CONFIG_X86_32
-
-#define is_ia32	1
-#define ia32_setup_frame	__setup_frame
-#define ia32_setup_rt_frame	__setup_rt_frame
-
-#else /* !CONFIG_X86_32 */
-
-#ifdef CONFIG_IA32_EMULATION
-#define is_ia32	test_thread_flag(TIF_IA32)
-#else /* !CONFIG_IA32_EMULATION */
-#define is_ia32	0
-#endif /* CONFIG_IA32_EMULATION */
-
-#endif /* CONFIG_X86_32 */
-
-static int
-setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-	       sigset_t *set, struct pt_regs *regs)
-{
-	int usig = signr_convert(sig);
-	int ret;
-
-	/* Set up the stack frame */
-	if (is_ia32) {
-		if (ka->sa.sa_flags & SA_SIGINFO)
-			ret = ia32_setup_rt_frame(usig, ka, info, set, regs);
-		else
-			ret = ia32_setup_frame(usig, ka, set, regs);
-	} else
-		ret = __setup_rt_frame(sig, ka, info, set, regs);
-
-	if (ret) {
-		force_sigsegv(sig, current);
-		return -EFAULT;
-	}
-
-	return ret;
-}
-
-static int
-handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
-	      sigset_t *oldset, struct pt_regs *regs)
-{
-	int ret;
-
-	/* Are we from a system call? */
-	if (syscall_get_nr(current, regs) >= 0) {
-		/* If so, check system call restarting.. */
-		switch (syscall_get_error(current, regs)) {
-		case -ERESTART_RESTARTBLOCK:
-		case -ERESTARTNOHAND:
-			regs->ax = -EINTR;
-			break;
-
-		case -ERESTARTSYS:
-			if (!(ka->sa.sa_flags & SA_RESTART)) {
-				regs->ax = -EINTR;
-				break;
-			}
-		/* fallthrough */
-		case -ERESTARTNOINTR:
-			regs->ax = regs->orig_ax;
-			regs->ip -= 2;
-			break;
-		}
-	}
-
-	/*
-	 * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF
-	 * flag so that register information in the sigcontext is correct.
-	 */
-	if (unlikely(regs->flags & X86_EFLAGS_TF) &&
-	    likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
-		regs->flags &= ~X86_EFLAGS_TF;
-
-	ret = setup_rt_frame(sig, ka, info, oldset, regs);
-
-	if (ret)
-		return ret;
-
-#ifdef CONFIG_X86_64
-	/*
-	 * This has nothing to do with segment registers,
-	 * despite the name.  This magic affects uaccess.h
-	 * macros' behavior.  Reset it to the normal setting.
-	 */
-	set_fs(USER_DS);
-#endif
-
-	/*
-	 * Clear the direction flag as per the ABI for function entry.
-	 */
-	regs->flags &= ~X86_EFLAGS_DF;
-
-	/*
-	 * Clear TF when entering the signal handler, but
-	 * notify any tracer that was single-stepping it.
-	 * The tracer may want to single-step inside the
-	 * handler too.
-	 */
-	regs->flags &= ~X86_EFLAGS_TF;
-
-	spin_lock_irq(&current->sighand->siglock);
-	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
-	if (!(ka->sa.sa_flags & SA_NODEFER))
-		sigaddset(&current->blocked, sig);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	tracehook_signal_handler(sig, info, ka, regs,
-				 test_thread_flag(TIF_SINGLESTEP));
-
-	return 0;
-}
-
-#ifdef CONFIG_X86_32
-#define NR_restart_syscall	__NR_restart_syscall
-#else /* !CONFIG_X86_32 */
-#define NR_restart_syscall	\
-	test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
-#endif /* CONFIG_X86_32 */
-
-/*
- * Note that 'init' is a special process: it doesn't get signals it doesn't
- * want to handle. Thus you cannot kill init even with a SIGKILL even by
- * mistake.
- */
-static void do_signal(struct pt_regs *regs)
-{
-	struct k_sigaction ka;
-	siginfo_t info;
-	int signr;
-	sigset_t *oldset;
-
-	/*
-	 * We want the common case to go fast, which is why we may in certain
-	 * cases get here from kernel mode. Just return without doing anything
-	 * if so.
-	 * X86_32: vm86 regs switched out by assembly code before reaching
-	 * here, so testing against kernel CS suffices.
-	 */
-	if (!user_mode(regs))
-		return;
-
-	if (current_thread_info()->status & TS_RESTORE_SIGMASK)
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
-	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
-	if (signr > 0) {
-		/*
-		 * Re-enable any watchpoints before delivering the
-		 * signal to user space. The processor register will
-		 * have been cleared if the watchpoint triggered
-		 * inside the kernel.
-		 */
-		if (current->thread.debugreg7)
-			set_debugreg(current->thread.debugreg7, 7);
-
-		/* Whee! Actually deliver the signal.  */
-		if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
-			/*
-			 * A signal was successfully delivered; the saved
-			 * sigmask will have been stored in the signal frame,
-			 * and will be restored by sigreturn, so we can simply
-			 * clear the TS_RESTORE_SIGMASK flag.
-			 */
-			current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-		}
-		return;
-	}
-
-	/* Did we come from a system call? */
-	if (syscall_get_nr(current, regs) >= 0) {
-		/* Restart the system call - no handlers present */
-		switch (syscall_get_error(current, regs)) {
-		case -ERESTARTNOHAND:
-		case -ERESTARTSYS:
-		case -ERESTARTNOINTR:
-			regs->ax = regs->orig_ax;
-			regs->ip -= 2;
-			break;
-
-		case -ERESTART_RESTARTBLOCK:
-			regs->ax = NR_restart_syscall;
-			regs->ip -= 2;
-			break;
-		}
-	}
-
-	/*
-	 * If there's no signal to deliver, we just put the saved sigmask
-	 * back.
-	 */
-	if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
-		current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
-}
-
-/*
- * notification of userspace execution resumption
- * - triggered by the TIF_WORK_MASK flags
- */
-void
-do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
-{
-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
-	/* notify userspace of pending MCEs */
-	if (thread_info_flags & _TIF_MCE_NOTIFY)
-		mce_notify_user();
-#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
-
-	/* deal with pending signal delivery */
-	if (thread_info_flags & _TIF_SIGPENDING)
-		do_signal(regs);
-
-	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
-		clear_thread_flag(TIF_NOTIFY_RESUME);
-		tracehook_notify_resume(regs);
-	}
-
-#ifdef CONFIG_X86_32
-	clear_thread_flag(TIF_IRET);
-#endif /* CONFIG_X86_32 */
-}
-
-void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
-{
-	struct task_struct *me = current;
-
-	if (show_unhandled_signals && printk_ratelimit()) {
-		printk(KERN_INFO
-		       "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
-		       me->comm, me->pid, where, frame,
-		       regs->ip, regs->sp, regs->orig_ax);
-		print_vma_addr(" in ", regs->ip);
-		printk(KERN_CONT "\n");
-	}
-
-	force_sig(SIGSEGV, me);
-}
-- 
cgit v1.2.3-70-g09d2


From 4db646b1af8fdcf01d690d29eeae44cd937edb0d Mon Sep 17 00:00:00 2001
From: Hannes Eder <hannes@hanneseder.net>
Date: Sun, 23 Nov 2008 20:49:52 +0100
Subject: x86: microcode: fix sparse warnings

Impact: make global variables and a function static

Fix following sparse warnings:

  arch/x86/kernel/microcode_core.c:102:22: warning: symbol
  'microcode_ops' was not declared. Should it be static?
  arch/x86/kernel/microcode_core.c:206:24: warning: symbol
  'microcode_pdev' was not declared. Should it be static?
  arch/x86/kernel/microcode_core.c:322:6: warning: symbol
  'microcode_update_cpu' was not declared. Should it be static?
  arch/x86/kernel/microcode_intel.c:468:22: warning: symbol
  'microcode_intel_ops' was not declared. Should it be static?

Signed-off-by: Hannes Eder <hannes@hanneseder.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_core.c  | 6 +++---
 arch/x86/kernel/microcode_intel.c | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 82fb2809ce32..5b711a534495 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -99,7 +99,7 @@ MODULE_LICENSE("GPL");
 
 #define MICROCODE_VERSION 	"2.00"
 
-struct microcode_ops *microcode_ops;
+static struct microcode_ops *microcode_ops;
 
 /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
 static DEFINE_MUTEX(microcode_mutex);
@@ -203,7 +203,7 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
 #endif
 
 /* fake device for request_firmware */
-struct platform_device *microcode_pdev;
+static struct platform_device *microcode_pdev;
 
 static ssize_t reload_store(struct sys_device *dev,
 			    struct sysdev_attribute *attr,
@@ -319,7 +319,7 @@ static int microcode_resume_cpu(int cpu)
 	return 0;
 }
 
-void microcode_update_cpu(int cpu)
+static void microcode_update_cpu(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	int err = 0;
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 622dc4a21784..c34c820ee486 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -465,7 +465,7 @@ static void microcode_fini_cpu(int cpu)
 	uci->mc = NULL;
 }
 
-struct microcode_ops microcode_intel_ops = {
+static struct microcode_ops microcode_intel_ops = {
 	.request_microcode_user		  = request_microcode_user,
 	.request_microcode_fw             = request_microcode_fw,
 	.collect_cpu_info                 = collect_cpu_info,
-- 
cgit v1.2.3-70-g09d2


From ddeb8f2149de280d54f0c8910cead42e6042b2cb Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Mon, 24 Nov 2008 13:24:28 +0100
Subject: x86_64: get rid of the use of KPROBE_ENTRY / KPROBE_END

Impact: clean up assembly macros and annotations - with some object impact

entry_64.S is the only user of KPROBE_ENTRY / KPROBE_END on
x86_64. This patch reorders entry_64.S and explicitly generates
a separate section for functions that need the protection. The
generated code before and after the patch is equal.

Implicitly changing sections in assembly files makes it more
difficult to follow why the assembler is doing certain things.
For example,

.p2align 5
KPROBE_ENTRY(...)

was not doing what you would expect. Other section changes
(__ex_table, .fixup, .init.rodata) are done explicitly already.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Acked-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S | 444 ++++++++++++++++++++++-----------------------
 1 file changed, 220 insertions(+), 224 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index f2d546e16354..38fcd0517c31 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1002,7 +1002,7 @@ END(\sym)
 .endm
 
 .macro paranoidzeroentry sym do_sym
-KPROBE_ENTRY(\sym)
+ENTRY(\sym)
 	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	pushq $-1		/* ORIG_RAX: no syscall to restart */
@@ -1015,11 +1015,11 @@ KPROBE_ENTRY(\sym)
 	call \do_sym
 	jmp paranoid_exit	/* %ebx: no swapgs flag */
 	CFI_ENDPROC
-KPROBE_END(\sym)
+END(\sym)
 .endm
 
 .macro paranoidzeroentry_ist sym do_sym ist
-KPROBE_ENTRY(\sym)
+ENTRY(\sym)
  	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	pushq $-1		/* ORIG_RAX: no syscall to restart */
@@ -1035,15 +1035,11 @@ KPROBE_ENTRY(\sym)
 	addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
 	jmp paranoid_exit	/* %ebx: no swapgs flag */
 	CFI_ENDPROC
-KPROBE_END(\sym)
+END(\sym)
 .endm
 
-.macro errorentry sym do_sym entry=0
-.if \entry
-KPROBE_ENTRY(\sym)
-.else
+.macro errorentry sym do_sym
 ENTRY(\sym)
-.endif
 	XCPT_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	subq $15*8,%rsp
@@ -1056,20 +1052,12 @@ ENTRY(\sym)
 	call \do_sym
 	jmp error_exit			/* %ebx: no swapgs flag */
 	CFI_ENDPROC
-.if \entry
-KPROBE_END(\sym)
-.else
 END(\sym)
-.endif
 .endm
 
 	/* error code is on the stack already */
-.macro paranoiderrorentry sym do_sym entry=1
-.if \entry
-KPROBE_ENTRY(\sym)
-.else
+.macro paranoiderrorentry sym do_sym
 ENTRY(\sym)
-.endif
 	XCPT_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	subq $15*8,%rsp
@@ -1083,166 +1071,23 @@ ENTRY(\sym)
 	call \do_sym
 	jmp paranoid_exit		/* %ebx: no swapgs flag */
 	CFI_ENDPROC
-.if \entry
-KPROBE_END(\sym)
-.else
 END(\sym)
-.endif
 .endm
 
 zeroentry divide_error do_divide_error
-paranoidzeroentry_ist debug do_debug DEBUG_STACK
-paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
 zeroentry overflow do_overflow
 zeroentry bounds do_bounds
 zeroentry invalid_op do_invalid_op
 zeroentry device_not_available do_device_not_available
-paranoiderrorentry double_fault do_double_fault 0
+paranoiderrorentry double_fault do_double_fault
 zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
 errorentry invalid_TSS do_invalid_TSS
 errorentry segment_not_present do_segment_not_present
-paranoiderrorentry stack_segment do_stack_segment
-errorentry general_protection do_general_protection 1
-errorentry page_fault do_page_fault 1
 zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
 zeroentry coprocessor_error do_coprocessor_error
 errorentry alignment_check do_alignment_check
-#ifdef CONFIG_X86_MCE
-paranoidzeroentry machine_check do_machine_check
-#endif
 zeroentry simd_coprocessor_error do_simd_coprocessor_error
 
-	/*
- 	 * "Paranoid" exit path from exception stack.
-  	 * Paranoid because this is used by NMIs and cannot take
-	 * any kernel state for granted.
-	 * We don't do kernel preemption checks here, because only
-	 * NMI should be common and it does not enable IRQs and
-	 * cannot get reschedule ticks.
-	 *
-	 * "trace" is 0 for the NMI handler only, because irq-tracing
-	 * is fundamentally NMI-unsafe. (we cannot change the soft and
-	 * hard flags at once, atomically)
-	 */
-
-	/* ebx:	no swapgs flag */
-KPROBE_ENTRY(paranoid_exit)
-	INTR_FRAME
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-	testl %ebx,%ebx				/* swapgs needed? */
-	jnz paranoid_restore
-	testl $3,CS(%rsp)
-	jnz   paranoid_userspace
-paranoid_swapgs:
-	TRACE_IRQS_IRETQ 0
-	SWAPGS_UNSAFE_STACK
-paranoid_restore:
-	RESTORE_ALL 8
-	jmp irq_return
-paranoid_userspace:
-	GET_THREAD_INFO(%rcx)
-	movl TI_flags(%rcx),%ebx
-	andl $_TIF_WORK_MASK,%ebx
-	jz paranoid_swapgs
-	movq %rsp,%rdi			/* &pt_regs */
-	call sync_regs
-	movq %rax,%rsp			/* switch stack for scheduling */
-	testl $_TIF_NEED_RESCHED,%ebx
-	jnz paranoid_schedule
-	movl %ebx,%edx			/* arg3: thread flags */
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_NONE)
-	xorl %esi,%esi 			/* arg2: oldset */
-	movq %rsp,%rdi 			/* arg1: &pt_regs */
-	call do_notify_resume
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-	jmp paranoid_userspace
-paranoid_schedule:
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_ANY)
-	call schedule
-	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_OFF
-	jmp paranoid_userspace
-	CFI_ENDPROC
-KPROBE_END(paranoid_exit)
-
-/*
- * Exception entry point. This expects an error code/orig_rax on the stack.
- * returns in "no swapgs flag" in %ebx.
- */
-KPROBE_ENTRY(error_entry)
-	XCPT_FRAME
-	CFI_ADJUST_CFA_OFFSET 15*8
-	/* oldrax contains error code */
-	cld
-	movq_cfi rdi, RDI+8
-	movq_cfi rsi, RSI+8
-	movq_cfi rdx, RDX+8
-	movq_cfi rcx, RCX+8
-	movq_cfi rax, RAX+8
-	movq_cfi  r8,  R8+8
-	movq_cfi  r9,  R9+8
-	movq_cfi r10, R10+8
-	movq_cfi r11, R11+8
-	movq_cfi rbx, RBX+8
-	movq_cfi rbp, RBP+8
-	movq_cfi r12, R12+8
-	movq_cfi r13, R13+8
-	movq_cfi r14, R14+8
-	movq_cfi r15, R15+8
-	xorl %ebx,%ebx
-	testl $3,CS+8(%rsp)
-	je error_kernelspace
-error_swapgs:
-	SWAPGS
-error_sti:
-	TRACE_IRQS_OFF
-	ret
-	CFI_ENDPROC
-
-/*
- * There are two places in the kernel that can potentially fault with
- * usergs. Handle them here. The exception handlers after iret run with
- * kernel gs again, so don't set the user space flag. B stepping K8s
- * sometimes report an truncated RIP for IRET exceptions returning to
- * compat mode. Check for these here too.
- */
-error_kernelspace:
-	incl %ebx
-	leaq irq_return(%rip),%rcx
-	cmpq %rcx,RIP+8(%rsp)
-	je error_swapgs
-	movl %ecx,%ecx	/* zero extend */
-	cmpq %rcx,RIP+8(%rsp)
-	je error_swapgs
-	cmpq $gs_change,RIP+8(%rsp)
-        je error_swapgs
-	jmp error_sti
-KPROBE_END(error_entry)
-
-
-/* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */
-KPROBE_ENTRY(error_exit)
-	DEFAULT_FRAME
-	movl %ebx,%eax
-	RESTORE_REST
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-	GET_THREAD_INFO(%rcx)
-	testl %eax,%eax
-	jne retint_kernel
-	LOCKDEP_SYS_EXIT_IRQ
-	movl TI_flags(%rcx),%edx
-	movl $_TIF_WORK_MASK,%edi
-	andl %edi,%edx
-	jnz retint_careful
-	jmp retint_swapgs
-	CFI_ENDPROC
-KPROBE_END(error_exit)
-
        /* Reload gs selector with exception handling */
        /* edi:  new selector */
 ENTRY(native_load_gs_index)
@@ -1362,61 +1207,6 @@ ENTRY(kernel_execve)
 	CFI_ENDPROC
 END(kernel_execve)
 
-	/* runs on exception stack */
-KPROBE_ENTRY(nmi)
-	INTR_FRAME
-	PARAVIRT_ADJUST_EXCEPTION_FRAME
-	pushq_cfi $-1
-	subq $15*8, %rsp
-	CFI_ADJUST_CFA_OFFSET 15*8
-	call save_paranoid
-	DEFAULT_FRAME 0
-	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
-	movq %rsp,%rdi
-	movq $-1,%rsi
-	call do_nmi
-#ifdef CONFIG_TRACE_IRQFLAGS
-	/* paranoidexit; without TRACE_IRQS_OFF */
-	/* ebx:	no swapgs flag */
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	testl %ebx,%ebx				/* swapgs needed? */
-	jnz nmi_restore
-	testl $3,CS(%rsp)
-	jnz nmi_userspace
-nmi_swapgs:
-	SWAPGS_UNSAFE_STACK
-nmi_restore:
-	RESTORE_ALL 8
-	jmp irq_return
-nmi_userspace:
-	GET_THREAD_INFO(%rcx)
-	movl TI_flags(%rcx),%ebx
-	andl $_TIF_WORK_MASK,%ebx
-	jz nmi_swapgs
-	movq %rsp,%rdi			/* &pt_regs */
-	call sync_regs
-	movq %rax,%rsp			/* switch stack for scheduling */
-	testl $_TIF_NEED_RESCHED,%ebx
-	jnz nmi_schedule
-	movl %ebx,%edx			/* arg3: thread flags */
-	ENABLE_INTERRUPTS(CLBR_NONE)
-	xorl %esi,%esi 			/* arg2: oldset */
-	movq %rsp,%rdi 			/* arg1: &pt_regs */
-	call do_notify_resume
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	jmp nmi_userspace
-nmi_schedule:
-	ENABLE_INTERRUPTS(CLBR_ANY)
-	call schedule
-	DISABLE_INTERRUPTS(CLBR_ANY)
-	jmp nmi_userspace
-	CFI_ENDPROC
-#else
-	jmp paranoid_exit
- 	CFI_ENDPROC
-#endif
-KPROBE_END(nmi)
-
 /* Call softirq on interrupt stack. Interrupts are off. */
 ENTRY(call_softirq)
 	CFI_STARTPROC
@@ -1437,13 +1227,6 @@ ENTRY(call_softirq)
 	CFI_ENDPROC
 END(call_softirq)
 
-KPROBE_ENTRY(ignore_sysret)
-	CFI_STARTPROC
-	mov $-ENOSYS,%eax
-	sysret
-	CFI_ENDPROC
-KPROBE_END(ignore_sysret)
-
 #ifdef CONFIG_XEN
 zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
 
@@ -1540,3 +1323,216 @@ ENTRY(xen_failsafe_callback)
 END(xen_failsafe_callback)
 
 #endif /* CONFIG_XEN */
+
+/*
+ * Some functions should be protected against kprobes
+ */
+	.pushsection .kprobes.text, "ax"
+
+paranoidzeroentry_ist debug do_debug DEBUG_STACK
+paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
+paranoiderrorentry stack_segment do_stack_segment
+errorentry general_protection do_general_protection
+errorentry page_fault do_page_fault
+#ifdef CONFIG_X86_MCE
+paranoidzeroentry machine_check do_machine_check
+#endif
+
+	/*
+ 	 * "Paranoid" exit path from exception stack.
+  	 * Paranoid because this is used by NMIs and cannot take
+	 * any kernel state for granted.
+	 * We don't do kernel preemption checks here, because only
+	 * NMI should be common and it does not enable IRQs and
+	 * cannot get reschedule ticks.
+	 *
+	 * "trace" is 0 for the NMI handler only, because irq-tracing
+	 * is fundamentally NMI-unsafe. (we cannot change the soft and
+	 * hard flags at once, atomically)
+	 */
+
+	/* ebx:	no swapgs flag */
+ENTRY(paranoid_exit)
+	INTR_FRAME
+	DISABLE_INTERRUPTS(CLBR_NONE)
+	TRACE_IRQS_OFF
+	testl %ebx,%ebx				/* swapgs needed? */
+	jnz paranoid_restore
+	testl $3,CS(%rsp)
+	jnz   paranoid_userspace
+paranoid_swapgs:
+	TRACE_IRQS_IRETQ 0
+	SWAPGS_UNSAFE_STACK
+paranoid_restore:
+	RESTORE_ALL 8
+	jmp irq_return
+paranoid_userspace:
+	GET_THREAD_INFO(%rcx)
+	movl TI_flags(%rcx),%ebx
+	andl $_TIF_WORK_MASK,%ebx
+	jz paranoid_swapgs
+	movq %rsp,%rdi			/* &pt_regs */
+	call sync_regs
+	movq %rax,%rsp			/* switch stack for scheduling */
+	testl $_TIF_NEED_RESCHED,%ebx
+	jnz paranoid_schedule
+	movl %ebx,%edx			/* arg3: thread flags */
+	TRACE_IRQS_ON
+	ENABLE_INTERRUPTS(CLBR_NONE)
+	xorl %esi,%esi 			/* arg2: oldset */
+	movq %rsp,%rdi 			/* arg1: &pt_regs */
+	call do_notify_resume
+	DISABLE_INTERRUPTS(CLBR_NONE)
+	TRACE_IRQS_OFF
+	jmp paranoid_userspace
+paranoid_schedule:
+	TRACE_IRQS_ON
+	ENABLE_INTERRUPTS(CLBR_ANY)
+	call schedule
+	DISABLE_INTERRUPTS(CLBR_ANY)
+	TRACE_IRQS_OFF
+	jmp paranoid_userspace
+	CFI_ENDPROC
+END(paranoid_exit)
+
+/*
+ * Exception entry point. This expects an error code/orig_rax on the stack.
+ * returns in "no swapgs flag" in %ebx.
+ */
+ENTRY(error_entry)
+	XCPT_FRAME
+	CFI_ADJUST_CFA_OFFSET 15*8
+	/* oldrax contains error code */
+	cld
+	movq_cfi rdi, RDI+8
+	movq_cfi rsi, RSI+8
+	movq_cfi rdx, RDX+8
+	movq_cfi rcx, RCX+8
+	movq_cfi rax, RAX+8
+	movq_cfi  r8,  R8+8
+	movq_cfi  r9,  R9+8
+	movq_cfi r10, R10+8
+	movq_cfi r11, R11+8
+	movq_cfi rbx, RBX+8
+	movq_cfi rbp, RBP+8
+	movq_cfi r12, R12+8
+	movq_cfi r13, R13+8
+	movq_cfi r14, R14+8
+	movq_cfi r15, R15+8
+	xorl %ebx,%ebx
+	testl $3,CS+8(%rsp)
+	je error_kernelspace
+error_swapgs:
+	SWAPGS
+error_sti:
+	TRACE_IRQS_OFF
+	ret
+	CFI_ENDPROC
+
+/*
+ * There are two places in the kernel that can potentially fault with
+ * usergs. Handle them here. The exception handlers after iret run with
+ * kernel gs again, so don't set the user space flag. B stepping K8s
+ * sometimes report an truncated RIP for IRET exceptions returning to
+ * compat mode. Check for these here too.
+ */
+error_kernelspace:
+	incl %ebx
+	leaq irq_return(%rip),%rcx
+	cmpq %rcx,RIP+8(%rsp)
+	je error_swapgs
+	movl %ecx,%ecx	/* zero extend */
+	cmpq %rcx,RIP+8(%rsp)
+	je error_swapgs
+	cmpq $gs_change,RIP+8(%rsp)
+        je error_swapgs
+	jmp error_sti
+END(error_entry)
+
+
+/* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */
+ENTRY(error_exit)
+	DEFAULT_FRAME
+	movl %ebx,%eax
+	RESTORE_REST
+	DISABLE_INTERRUPTS(CLBR_NONE)
+	TRACE_IRQS_OFF
+	GET_THREAD_INFO(%rcx)
+	testl %eax,%eax
+	jne retint_kernel
+	LOCKDEP_SYS_EXIT_IRQ
+	movl TI_flags(%rcx),%edx
+	movl $_TIF_WORK_MASK,%edi
+	andl %edi,%edx
+	jnz retint_careful
+	jmp retint_swapgs
+	CFI_ENDPROC
+END(error_exit)
+
+
+	/* runs on exception stack */
+ENTRY(nmi)
+	INTR_FRAME
+	PARAVIRT_ADJUST_EXCEPTION_FRAME
+	pushq_cfi $-1
+	subq $15*8, %rsp
+	CFI_ADJUST_CFA_OFFSET 15*8
+	call save_paranoid
+	DEFAULT_FRAME 0
+	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+	movq %rsp,%rdi
+	movq $-1,%rsi
+	call do_nmi
+#ifdef CONFIG_TRACE_IRQFLAGS
+	/* paranoidexit; without TRACE_IRQS_OFF */
+	/* ebx:	no swapgs flag */
+	DISABLE_INTERRUPTS(CLBR_NONE)
+	testl %ebx,%ebx				/* swapgs needed? */
+	jnz nmi_restore
+	testl $3,CS(%rsp)
+	jnz nmi_userspace
+nmi_swapgs:
+	SWAPGS_UNSAFE_STACK
+nmi_restore:
+	RESTORE_ALL 8
+	jmp irq_return
+nmi_userspace:
+	GET_THREAD_INFO(%rcx)
+	movl TI_flags(%rcx),%ebx
+	andl $_TIF_WORK_MASK,%ebx
+	jz nmi_swapgs
+	movq %rsp,%rdi			/* &pt_regs */
+	call sync_regs
+	movq %rax,%rsp			/* switch stack for scheduling */
+	testl $_TIF_NEED_RESCHED,%ebx
+	jnz nmi_schedule
+	movl %ebx,%edx			/* arg3: thread flags */
+	ENABLE_INTERRUPTS(CLBR_NONE)
+	xorl %esi,%esi 			/* arg2: oldset */
+	movq %rsp,%rdi 			/* arg1: &pt_regs */
+	call do_notify_resume
+	DISABLE_INTERRUPTS(CLBR_NONE)
+	jmp nmi_userspace
+nmi_schedule:
+	ENABLE_INTERRUPTS(CLBR_ANY)
+	call schedule
+	DISABLE_INTERRUPTS(CLBR_ANY)
+	jmp nmi_userspace
+	CFI_ENDPROC
+#else
+	jmp paranoid_exit
+ 	CFI_ENDPROC
+#endif
+END(nmi)
+
+ENTRY(ignore_sysret)
+	CFI_STARTPROC
+	mov $-ENOSYS,%eax
+	sysret
+	CFI_ENDPROC
+END(ignore_sysret)
+
+/*
+ * End of kprobes section
+ */
+	.popsection
-- 
cgit v1.2.3-70-g09d2


From d211af055d0c12dc3416c2886e6fbdc6eb74a381 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Mon, 24 Nov 2008 15:38:45 +0100
Subject: i386: get rid of the use of KPROBE_ENTRY / KPROBE_END

entry_32.S is now the only user of KPROBE_ENTRY / KPROBE_END,
treewide. This patch reorders entry_64.S and explicitly generates
a separate section for functions that need the protection. The
generated code before and after the patch is equal.

The KPROBE_ENTRY and KPROBE_END macro's are removed too.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_32.S | 438 +++++++++++++++++++++++----------------------
 include/linux/linkage.h    |   8 -
 2 files changed, 224 insertions(+), 222 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index bd02ec77edc4..6e96028d1a9c 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -688,65 +688,6 @@ ENDPROC(name)
 /* The include is where all of the SMP etc. interrupts come from */
 #include "entry_arch.h"
 
-KPROBE_ENTRY(page_fault)
-	RING0_EC_FRAME
-	pushl $do_page_fault
-	CFI_ADJUST_CFA_OFFSET 4
-	ALIGN
-error_code:
-	/* the function address is in %fs's slot on the stack */
-	pushl %es
-	CFI_ADJUST_CFA_OFFSET 4
-	/*CFI_REL_OFFSET es, 0*/
-	pushl %ds
-	CFI_ADJUST_CFA_OFFSET 4
-	/*CFI_REL_OFFSET ds, 0*/
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET eax, 0
-	pushl %ebp
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET ebp, 0
-	pushl %edi
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET edi, 0
-	pushl %esi
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET esi, 0
-	pushl %edx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET edx, 0
-	pushl %ecx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET ecx, 0
-	pushl %ebx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET ebx, 0
-	cld
-	pushl %fs
-	CFI_ADJUST_CFA_OFFSET 4
-	/*CFI_REL_OFFSET fs, 0*/
-	movl $(__KERNEL_PERCPU), %ecx
-	movl %ecx, %fs
-	UNWIND_ESPFIX_STACK
-	popl %ecx
-	CFI_ADJUST_CFA_OFFSET -4
-	/*CFI_REGISTER es, ecx*/
-	movl PT_FS(%esp), %edi		# get the function address
-	movl PT_ORIG_EAX(%esp), %edx	# get the error code
-	movl $-1, PT_ORIG_EAX(%esp)	# no syscall to restart
-	mov  %ecx, PT_FS(%esp)
-	/*CFI_REL_OFFSET fs, ES*/
-	movl $(__USER_DS), %ecx
-	movl %ecx, %ds
-	movl %ecx, %es
-	TRACE_IRQS_OFF
-	movl %esp,%eax			# pt_regs pointer
-	call *%edi
-	jmp ret_from_exception
-	CFI_ENDPROC
-KPROBE_END(page_fault)
-
 ENTRY(coprocessor_error)
 	RING0_INT_FRAME
 	pushl $0
@@ -777,140 +718,6 @@ ENTRY(device_not_available)
 	CFI_ENDPROC
 END(device_not_available)
 
-/*
- * Debug traps and NMI can happen at the one SYSENTER instruction
- * that sets up the real kernel stack. Check here, since we can't
- * allow the wrong stack to be used.
- *
- * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
- * already pushed 3 words if it hits on the sysenter instruction:
- * eflags, cs and eip.
- *
- * We just load the right stack, and push the three (known) values
- * by hand onto the new stack - while updating the return eip past
- * the instruction that would have done it for sysenter.
- */
-#define FIX_STACK(offset, ok, label)		\
-	cmpw $__KERNEL_CS,4(%esp);		\
-	jne ok;					\
-label:						\
-	movl TSS_sysenter_sp0+offset(%esp),%esp;	\
-	CFI_DEF_CFA esp, 0;			\
-	CFI_UNDEFINED eip;			\
-	pushfl;					\
-	CFI_ADJUST_CFA_OFFSET 4;		\
-	pushl $__KERNEL_CS;			\
-	CFI_ADJUST_CFA_OFFSET 4;		\
-	pushl $sysenter_past_esp;		\
-	CFI_ADJUST_CFA_OFFSET 4;		\
-	CFI_REL_OFFSET eip, 0
-
-KPROBE_ENTRY(debug)
-	RING0_INT_FRAME
-	cmpl $ia32_sysenter_target,(%esp)
-	jne debug_stack_correct
-	FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
-debug_stack_correct:
-	pushl $-1			# mark this as an int
-	CFI_ADJUST_CFA_OFFSET 4
-	SAVE_ALL
-	TRACE_IRQS_OFF
-	xorl %edx,%edx			# error code 0
-	movl %esp,%eax			# pt_regs pointer
-	call do_debug
-	jmp ret_from_exception
-	CFI_ENDPROC
-KPROBE_END(debug)
-
-/*
- * NMI is doubly nasty. It can happen _while_ we're handling
- * a debug fault, and the debug fault hasn't yet been able to
- * clear up the stack. So we first check whether we got  an
- * NMI on the sysenter entry path, but after that we need to
- * check whether we got an NMI on the debug path where the debug
- * fault happened on the sysenter path.
- */
-KPROBE_ENTRY(nmi)
-	RING0_INT_FRAME
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
-	movl %ss, %eax
-	cmpw $__ESPFIX_SS, %ax
-	popl %eax
-	CFI_ADJUST_CFA_OFFSET -4
-	je nmi_espfix_stack
-	cmpl $ia32_sysenter_target,(%esp)
-	je nmi_stack_fixup
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
-	movl %esp,%eax
-	/* Do not access memory above the end of our stack page,
-	 * it might not exist.
-	 */
-	andl $(THREAD_SIZE-1),%eax
-	cmpl $(THREAD_SIZE-20),%eax
-	popl %eax
-	CFI_ADJUST_CFA_OFFSET -4
-	jae nmi_stack_correct
-	cmpl $ia32_sysenter_target,12(%esp)
-	je nmi_debug_stack_check
-nmi_stack_correct:
-	/* We have a RING0_INT_FRAME here */
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
-	SAVE_ALL
-	TRACE_IRQS_OFF
-	xorl %edx,%edx		# zero error code
-	movl %esp,%eax		# pt_regs pointer
-	call do_nmi
-	jmp restore_nocheck_notrace
-	CFI_ENDPROC
-
-nmi_stack_fixup:
-	RING0_INT_FRAME
-	FIX_STACK(12,nmi_stack_correct, 1)
-	jmp nmi_stack_correct
-
-nmi_debug_stack_check:
-	/* We have a RING0_INT_FRAME here */
-	cmpw $__KERNEL_CS,16(%esp)
-	jne nmi_stack_correct
-	cmpl $debug,(%esp)
-	jb nmi_stack_correct
-	cmpl $debug_esp_fix_insn,(%esp)
-	ja nmi_stack_correct
-	FIX_STACK(24,nmi_stack_correct, 1)
-	jmp nmi_stack_correct
-
-nmi_espfix_stack:
-	/* We have a RING0_INT_FRAME here.
-	 *
-	 * create the pointer to lss back
-	 */
-	pushl %ss
-	CFI_ADJUST_CFA_OFFSET 4
-	pushl %esp
-	CFI_ADJUST_CFA_OFFSET 4
-	addw $4, (%esp)
-	/* copy the iret frame of 12 bytes */
-	.rept 3
-	pushl 16(%esp)
-	CFI_ADJUST_CFA_OFFSET 4
-	.endr
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
-	SAVE_ALL
-	TRACE_IRQS_OFF
-	FIXUP_ESPFIX_STACK		# %eax == %esp
-	xorl %edx,%edx			# zero error code
-	call do_nmi
-	RESTORE_REGS
-	lss 12+4(%esp), %esp		# back to espfix stack
-	CFI_ADJUST_CFA_OFFSET -24
-	jmp irq_return
-	CFI_ENDPROC
-KPROBE_END(nmi)
-
 #ifdef CONFIG_PARAVIRT
 ENTRY(native_iret)
 	iret
@@ -926,19 +733,6 @@ ENTRY(native_irq_enable_sysexit)
 END(native_irq_enable_sysexit)
 #endif
 
-KPROBE_ENTRY(int3)
-	RING0_INT_FRAME
-	pushl $-1			# mark this as an int
-	CFI_ADJUST_CFA_OFFSET 4
-	SAVE_ALL
-	TRACE_IRQS_OFF
-	xorl %edx,%edx		# zero error code
-	movl %esp,%eax		# pt_regs pointer
-	call do_int3
-	jmp ret_from_exception
-	CFI_ENDPROC
-KPROBE_END(int3)
-
 ENTRY(overflow)
 	RING0_INT_FRAME
 	pushl $0
@@ -1003,14 +797,6 @@ ENTRY(stack_segment)
 	CFI_ENDPROC
 END(stack_segment)
 
-KPROBE_ENTRY(general_protection)
-	RING0_EC_FRAME
-	pushl $do_general_protection
-	CFI_ADJUST_CFA_OFFSET 4
-	jmp error_code
-	CFI_ENDPROC
-KPROBE_END(general_protection)
-
 ENTRY(alignment_check)
 	RING0_EC_FRAME
 	pushl $do_alignment_check
@@ -1220,3 +1006,227 @@ END(mcount)
 #include "syscall_table_32.S"
 
 syscall_table_size=(.-sys_call_table)
+
+/*
+ * Some functions should be protected against kprobes
+ */
+	.pushsection .kprobes.text, "ax"
+
+ENTRY(page_fault)
+	RING0_EC_FRAME
+	pushl $do_page_fault
+	CFI_ADJUST_CFA_OFFSET 4
+	ALIGN
+error_code:
+	/* the function address is in %fs's slot on the stack */
+	pushl %es
+	CFI_ADJUST_CFA_OFFSET 4
+	/*CFI_REL_OFFSET es, 0*/
+	pushl %ds
+	CFI_ADJUST_CFA_OFFSET 4
+	/*CFI_REL_OFFSET ds, 0*/
+	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET eax, 0
+	pushl %ebp
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET ebp, 0
+	pushl %edi
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET edi, 0
+	pushl %esi
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET esi, 0
+	pushl %edx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET edx, 0
+	pushl %ecx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET ecx, 0
+	pushl %ebx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET ebx, 0
+	cld
+	pushl %fs
+	CFI_ADJUST_CFA_OFFSET 4
+	/*CFI_REL_OFFSET fs, 0*/
+	movl $(__KERNEL_PERCPU), %ecx
+	movl %ecx, %fs
+	UNWIND_ESPFIX_STACK
+	popl %ecx
+	CFI_ADJUST_CFA_OFFSET -4
+	/*CFI_REGISTER es, ecx*/
+	movl PT_FS(%esp), %edi		# get the function address
+	movl PT_ORIG_EAX(%esp), %edx	# get the error code
+	movl $-1, PT_ORIG_EAX(%esp)	# no syscall to restart
+	mov  %ecx, PT_FS(%esp)
+	/*CFI_REL_OFFSET fs, ES*/
+	movl $(__USER_DS), %ecx
+	movl %ecx, %ds
+	movl %ecx, %es
+	TRACE_IRQS_OFF
+	movl %esp,%eax			# pt_regs pointer
+	call *%edi
+	jmp ret_from_exception
+	CFI_ENDPROC
+END(page_fault)
+
+/*
+ * Debug traps and NMI can happen at the one SYSENTER instruction
+ * that sets up the real kernel stack. Check here, since we can't
+ * allow the wrong stack to be used.
+ *
+ * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
+ * already pushed 3 words if it hits on the sysenter instruction:
+ * eflags, cs and eip.
+ *
+ * We just load the right stack, and push the three (known) values
+ * by hand onto the new stack - while updating the return eip past
+ * the instruction that would have done it for sysenter.
+ */
+#define FIX_STACK(offset, ok, label)		\
+	cmpw $__KERNEL_CS,4(%esp);		\
+	jne ok;					\
+label:						\
+	movl TSS_sysenter_sp0+offset(%esp),%esp;	\
+	CFI_DEF_CFA esp, 0;			\
+	CFI_UNDEFINED eip;			\
+	pushfl;					\
+	CFI_ADJUST_CFA_OFFSET 4;		\
+	pushl $__KERNEL_CS;			\
+	CFI_ADJUST_CFA_OFFSET 4;		\
+	pushl $sysenter_past_esp;		\
+	CFI_ADJUST_CFA_OFFSET 4;		\
+	CFI_REL_OFFSET eip, 0
+
+ENTRY(debug)
+	RING0_INT_FRAME
+	cmpl $ia32_sysenter_target,(%esp)
+	jne debug_stack_correct
+	FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
+debug_stack_correct:
+	pushl $-1			# mark this as an int
+	CFI_ADJUST_CFA_OFFSET 4
+	SAVE_ALL
+	TRACE_IRQS_OFF
+	xorl %edx,%edx			# error code 0
+	movl %esp,%eax			# pt_regs pointer
+	call do_debug
+	jmp ret_from_exception
+	CFI_ENDPROC
+END(debug)
+
+/*
+ * NMI is doubly nasty. It can happen _while_ we're handling
+ * a debug fault, and the debug fault hasn't yet been able to
+ * clear up the stack. So we first check whether we got  an
+ * NMI on the sysenter entry path, but after that we need to
+ * check whether we got an NMI on the debug path where the debug
+ * fault happened on the sysenter path.
+ */
+ENTRY(nmi)
+	RING0_INT_FRAME
+	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
+	movl %ss, %eax
+	cmpw $__ESPFIX_SS, %ax
+	popl %eax
+	CFI_ADJUST_CFA_OFFSET -4
+	je nmi_espfix_stack
+	cmpl $ia32_sysenter_target,(%esp)
+	je nmi_stack_fixup
+	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
+	movl %esp,%eax
+	/* Do not access memory above the end of our stack page,
+	 * it might not exist.
+	 */
+	andl $(THREAD_SIZE-1),%eax
+	cmpl $(THREAD_SIZE-20),%eax
+	popl %eax
+	CFI_ADJUST_CFA_OFFSET -4
+	jae nmi_stack_correct
+	cmpl $ia32_sysenter_target,12(%esp)
+	je nmi_debug_stack_check
+nmi_stack_correct:
+	/* We have a RING0_INT_FRAME here */
+	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
+	SAVE_ALL
+	TRACE_IRQS_OFF
+	xorl %edx,%edx		# zero error code
+	movl %esp,%eax		# pt_regs pointer
+	call do_nmi
+	jmp restore_nocheck_notrace
+	CFI_ENDPROC
+
+nmi_stack_fixup:
+	RING0_INT_FRAME
+	FIX_STACK(12,nmi_stack_correct, 1)
+	jmp nmi_stack_correct
+
+nmi_debug_stack_check:
+	/* We have a RING0_INT_FRAME here */
+	cmpw $__KERNEL_CS,16(%esp)
+	jne nmi_stack_correct
+	cmpl $debug,(%esp)
+	jb nmi_stack_correct
+	cmpl $debug_esp_fix_insn,(%esp)
+	ja nmi_stack_correct
+	FIX_STACK(24,nmi_stack_correct, 1)
+	jmp nmi_stack_correct
+
+nmi_espfix_stack:
+	/* We have a RING0_INT_FRAME here.
+	 *
+	 * create the pointer to lss back
+	 */
+	pushl %ss
+	CFI_ADJUST_CFA_OFFSET 4
+	pushl %esp
+	CFI_ADJUST_CFA_OFFSET 4
+	addw $4, (%esp)
+	/* copy the iret frame of 12 bytes */
+	.rept 3
+	pushl 16(%esp)
+	CFI_ADJUST_CFA_OFFSET 4
+	.endr
+	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
+	SAVE_ALL
+	TRACE_IRQS_OFF
+	FIXUP_ESPFIX_STACK		# %eax == %esp
+	xorl %edx,%edx			# zero error code
+	call do_nmi
+	RESTORE_REGS
+	lss 12+4(%esp), %esp		# back to espfix stack
+	CFI_ADJUST_CFA_OFFSET -24
+	jmp irq_return
+	CFI_ENDPROC
+END(nmi)
+
+ENTRY(int3)
+	RING0_INT_FRAME
+	pushl $-1			# mark this as an int
+	CFI_ADJUST_CFA_OFFSET 4
+	SAVE_ALL
+	TRACE_IRQS_OFF
+	xorl %edx,%edx		# zero error code
+	movl %esp,%eax		# pt_regs pointer
+	call do_int3
+	jmp ret_from_exception
+	CFI_ENDPROC
+END(int3)
+
+ENTRY(general_protection)
+	RING0_EC_FRAME
+	pushl $do_general_protection
+	CFI_ADJUST_CFA_OFFSET 4
+	jmp error_code
+	CFI_ENDPROC
+END(general_protection)
+
+/*
+ * End of kprobes section
+ */
+	.popsection
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 9fd1f859021b..fee9e59649c1 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -64,14 +64,6 @@
 	name:
 #endif
 
-#define KPROBE_ENTRY(name) \
-  .pushsection .kprobes.text, "ax"; \
-  ENTRY(name)
-
-#define KPROBE_END(name) \
-  END(name);		 \
-  .popsection
-
 #ifndef END
 #define END(name) \
   .size name, .-name
-- 
cgit v1.2.3-70-g09d2


From 33454539f386a2beb38269bea5fff82b3d56b0e9 Mon Sep 17 00:00:00 2001
From: "gorcunov@gmail.com" <gorcunov@gmail.com>
Date: Wed, 26 Nov 2008 22:17:02 +0300
Subject: x86: entry_64.S - use X86_EFLAGS_IF instead of hardcoded number

Impact: cleanup

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Acked-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 38fcd0517c31..1c309d546518 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -194,7 +194,7 @@ ENTRY(native_usergs_sysret64)
 	pushq %rax /* rsp */
 	CFI_ADJUST_CFA_OFFSET	8
 	CFI_REL_OFFSET	rsp,0
-	pushq $(1<<9) /* eflags - interrupts on */
+	pushq $X86_EFLAGS_IF /* eflags - interrupts on */
 	CFI_ADJUST_CFA_OFFSET	8
 	/*CFI_REL_OFFSET	rflags,0*/
 	pushq $__KERNEL_CS /* cs */
-- 
cgit v1.2.3-70-g09d2


From c2c631e318091118587f3b766347d259c9265b8b Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Wed, 26 Nov 2008 22:17:00 +0300
Subject: x86: entry_64.S - use ENTRY to define child_rip

child_rip is called not by its name but indirectly
rather so make it global and aligned.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Acked-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1c309d546518..0a910a7f85f5 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1160,7 +1160,7 @@ ENTRY(kernel_thread)
 	CFI_ENDPROC
 END(kernel_thread)
 
-child_rip:
+ENTRY(child_rip)
 	pushq $0		# fake return address
 	CFI_STARTPROC
 	/*
-- 
cgit v1.2.3-70-g09d2


From 1d9b16d1690fe5edb1c907fe4746681cf026cdf3 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 27 Nov 2008 18:39:15 +0100
Subject: x86: move GART specific stuff from iommu.h to gart.h

Impact: cleanup

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/gart.h      | 33 +++++++++++++++++++++++++++++++++
 arch/x86/include/asm/iommu.h     | 33 ---------------------------------
 arch/x86/kernel/amd_iommu.c      |  1 +
 arch/x86/kernel/amd_iommu_init.c |  1 +
 arch/x86/kernel/early-quirks.c   |  1 +
 arch/x86/kernel/pci-dma.c        |  1 +
 arch/x86/kernel/setup.c          |  1 +
 7 files changed, 38 insertions(+), 33 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
index 74252264433d..6cfdafa409d8 100644
--- a/arch/x86/include/asm/gart.h
+++ b/arch/x86/include/asm/gart.h
@@ -29,6 +29,39 @@ extern int fix_aperture;
 #define AMD64_GARTCACHECTL	0x9c
 #define AMD64_GARTEN		(1<<0)
 
+#ifdef CONFIG_GART_IOMMU
+extern int gart_iommu_aperture;
+extern int gart_iommu_aperture_allowed;
+extern int gart_iommu_aperture_disabled;
+
+extern void early_gart_iommu_check(void);
+extern void gart_iommu_init(void);
+extern void gart_iommu_shutdown(void);
+extern void __init gart_parse_options(char *);
+extern void gart_iommu_hole_init(void);
+
+#else
+#define gart_iommu_aperture            0
+#define gart_iommu_aperture_allowed    0
+#define gart_iommu_aperture_disabled   1
+
+static inline void early_gart_iommu_check(void)
+{
+}
+static inline void gart_iommu_init(void)
+{
+}
+static inline void gart_iommu_shutdown(void)
+{
+}
+static inline void gart_parse_options(char *options)
+{
+}
+static inline void gart_iommu_hole_init(void)
+{
+}
+#endif
+
 extern int agp_amd64_init(void);
 
 static inline void enable_gart_translation(struct pci_dev *dev, u64 addr)
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 0b500c5b6446..295b13193f4d 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -12,37 +12,4 @@ extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len);
 /* 10 seconds */
 #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
 
-#ifdef CONFIG_GART_IOMMU
-extern int gart_iommu_aperture;
-extern int gart_iommu_aperture_allowed;
-extern int gart_iommu_aperture_disabled;
-
-extern void early_gart_iommu_check(void);
-extern void gart_iommu_init(void);
-extern void gart_iommu_shutdown(void);
-extern void __init gart_parse_options(char *);
-extern void gart_iommu_hole_init(void);
-
-#else
-#define gart_iommu_aperture            0
-#define gart_iommu_aperture_allowed    0
-#define gart_iommu_aperture_disabled   1
-
-static inline void early_gart_iommu_check(void)
-{
-}
-static inline void gart_iommu_init(void)
-{
-}
-static inline void gart_iommu_shutdown(void)
-{
-}
-static inline void gart_parse_options(char *options)
-{
-}
-static inline void gart_iommu_hole_init(void)
-{
-}
-#endif
-
 #endif /* _ASM_X86_IOMMU_H */
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 331b318304eb..172e0dc4641e 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -24,6 +24,7 @@
 #include <linux/iommu-helper.h>
 #include <asm/proto.h>
 #include <asm/iommu.h>
+#include <asm/gart.h>
 #include <asm/amd_iommu_types.h>
 #include <asm/amd_iommu.h>
 
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 0cdcda35a05f..7685f0774a8f 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -28,6 +28,7 @@
 #include <asm/amd_iommu_types.h>
 #include <asm/amd_iommu.h>
 #include <asm/iommu.h>
+#include <asm/gart.h>
 
 /*
  * definitions for the ACPI scanning code
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 1b894b72c0f5..744aa7fc49d5 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -17,6 +17,7 @@
 #include <asm/io_apic.h>
 #include <asm/apic.h>
 #include <asm/iommu.h>
+#include <asm/gart.h>
 
 static void __init fix_hypertransport_config(int num, int slot, int func)
 {
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 192624820217..12eeb4bfcdeb 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -6,6 +6,7 @@
 #include <asm/proto.h>
 #include <asm/dma.h>
 #include <asm/iommu.h>
+#include <asm/gart.h>
 #include <asm/calgary.h>
 #include <asm/amd_iommu.h>
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0fa6790c1dd3..67d5979e654e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -93,6 +93,7 @@
 #include <asm/desc.h>
 #include <asm/dma.h>
 #include <asm/iommu.h>
+#include <asm/gart.h>
 #include <asm/mmu_context.h>
 #include <asm/proto.h>
 
-- 
cgit v1.2.3-70-g09d2


From 5ae3a139cf4fc2349f1dfa1993a66c1dcc119468 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Thu, 27 Nov 2008 00:02:10 +0300
Subject: x86: uv bau interrupt -- use proper interrupt number

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Acked-by: Cliff Wickman <cpw@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 0a910a7f85f5..57d7f7a5ad2f 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -938,7 +938,7 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
 	irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
 #endif
 
-apicinterrupt 220 \
+apicinterrupt UV_BAU_MESSAGE \
 	uv_bau_message_intr1 uv_bau_message_interrupt
 apicinterrupt LOCAL_TIMER_VECTOR \
 	apic_timer_interrupt smp_apic_timer_interrupt
-- 
cgit v1.2.3-70-g09d2


From 9f1e87ea3ecb3c46c21f6a1a202ec82f99ed2473 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Thu, 27 Nov 2008 21:10:08 +0300
Subject: x86: entry_64.S - trivial: space, comments fixup

Impact: cleanup

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S | 94 +++++++++++++++++++++++-----------------------
 1 file changed, 48 insertions(+), 46 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 57d7f7a5ad2f..08c0c9777a09 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1020,7 +1020,7 @@ END(\sym)
 
 .macro paranoidzeroentry_ist sym do_sym ist
 ENTRY(\sym)
- 	INTR_FRAME
+	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	pushq $-1		/* ORIG_RAX: no syscall to restart */
 	CFI_ADJUST_CFA_OFFSET 8
@@ -1088,36 +1088,36 @@ zeroentry coprocessor_error do_coprocessor_error
 errorentry alignment_check do_alignment_check
 zeroentry simd_coprocessor_error do_simd_coprocessor_error
 
-       /* Reload gs selector with exception handling */
-       /* edi:  new selector */
+	/* Reload gs selector with exception handling */
+	/* edi:  new selector */
 ENTRY(native_load_gs_index)
 	CFI_STARTPROC
 	pushf
 	CFI_ADJUST_CFA_OFFSET 8
 	DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
-        SWAPGS
+	SWAPGS
 gs_change:
-        movl %edi,%gs
+	movl %edi,%gs
 2:	mfence		/* workaround */
 	SWAPGS
-        popf
+	popf
 	CFI_ADJUST_CFA_OFFSET -8
-        ret
+	ret
 	CFI_ENDPROC
 END(native_load_gs_index)
 
-        .section __ex_table,"a"
-        .align 8
-        .quad gs_change,bad_gs
-        .previous
-        .section .fixup,"ax"
+	.section __ex_table,"a"
+	.align 8
+	.quad gs_change,bad_gs
+	.previous
+	.section .fixup,"ax"
 	/* running with kernelgs */
 bad_gs:
 	SWAPGS			/* switch back to user gs */
 	xorl %eax,%eax
-        movl %eax,%gs
-        jmp  2b
-        .previous
+	movl %eax,%gs
+	jmp  2b
+	.previous
 
 /*
  * Create a kernel thread.
@@ -1152,7 +1152,7 @@ ENTRY(kernel_thread)
 	 * so internally to the x86_64 port you can rely on kernel_thread()
 	 * not to reschedule the child before returning, this avoids the need
 	 * of hacks for example to fork off the per-CPU idle tasks.
-         * [Hopefully no generic code relies on the reschedule -AK]
+	 * [Hopefully no generic code relies on the reschedule -AK]
 	 */
 	RESTORE_ALL
 	UNFAKE_STACK_FRAME
@@ -1231,22 +1231,24 @@ END(call_softirq)
 zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
 
 /*
-# A note on the "critical region" in our callback handler.
-# We want to avoid stacking callback handlers due to events occurring
-# during handling of the last event. To do this, we keep events disabled
-# until we've done all processing. HOWEVER, we must enable events before
-# popping the stack frame (can't be done atomically) and so it would still
-# be possible to get enough handler activations to overflow the stack.
-# Although unlikely, bugs of that kind are hard to track down, so we'd
-# like to avoid the possibility.
-# So, on entry to the handler we detect whether we interrupted an
-# existing activation in its critical region -- if so, we pop the current
-# activation and restart the handler using the previous one.
-*/
+ * A note on the "critical region" in our callback handler.
+ * We want to avoid stacking callback handlers due to events occurring
+ * during handling of the last event. To do this, we keep events disabled
+ * until we've done all processing. HOWEVER, we must enable events before
+ * popping the stack frame (can't be done atomically) and so it would still
+ * be possible to get enough handler activations to overflow the stack.
+ * Although unlikely, bugs of that kind are hard to track down, so we'd
+ * like to avoid the possibility.
+ * So, on entry to the handler we detect whether we interrupted an
+ * existing activation in its critical region -- if so, we pop the current
+ * activation and restart the handler using the previous one.
+ */
 ENTRY(xen_do_hypervisor_callback)   # do_hypervisor_callback(struct *pt_regs)
 	CFI_STARTPROC
-/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
-   see the correct pointer to the pt_regs */
+/*
+ * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
+ * see the correct pointer to the pt_regs
+ */
 	movq %rdi, %rsp            # we don't return, adjust the stack frame
 	CFI_ENDPROC
 	DEFAULT_FRAME
@@ -1264,18 +1266,18 @@ ENTRY(xen_do_hypervisor_callback)   # do_hypervisor_callback(struct *pt_regs)
 END(do_hypervisor_callback)
 
 /*
-# Hypervisor uses this for application faults while it executes.
-# We get here for two reasons:
-#  1. Fault while reloading DS, ES, FS or GS
-#  2. Fault while executing IRET
-# Category 1 we do not need to fix up as Xen has already reloaded all segment
-# registers that could be reloaded and zeroed the others.
-# Category 2 we fix up by killing the current process. We cannot use the
-# normal Linux return path in this case because if we use the IRET hypercall
-# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
-# We distinguish between categories by comparing each saved segment register
-# with its current contents: any discrepancy means we in category 1.
-*/
+ * Hypervisor uses this for application faults while it executes.
+ * We get here for two reasons:
+ *  1. Fault while reloading DS, ES, FS or GS
+ *  2. Fault while executing IRET
+ * Category 1 we do not need to fix up as Xen has already reloaded all segment
+ * registers that could be reloaded and zeroed the others.
+ * Category 2 we fix up by killing the current process. We cannot use the
+ * normal Linux return path in this case because if we use the IRET hypercall
+ * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
+ * We distinguish between categories by comparing each saved segment register
+ * with its current contents: any discrepancy means we in category 1.
+ */
 ENTRY(xen_failsafe_callback)
 	INTR_FRAME 1 (6*8)
 	/*CFI_REL_OFFSET gs,GS*/
@@ -1339,8 +1341,8 @@ paranoidzeroentry machine_check do_machine_check
 #endif
 
 	/*
- 	 * "Paranoid" exit path from exception stack.
-  	 * Paranoid because this is used by NMIs and cannot take
+	 * "Paranoid" exit path from exception stack.
+	 * Paranoid because this is used by NMIs and cannot take
 	 * any kernel state for granted.
 	 * We don't do kernel preemption checks here, because only
 	 * NMI should be common and it does not enable IRQs and
@@ -1445,7 +1447,7 @@ error_kernelspace:
 	cmpq %rcx,RIP+8(%rsp)
 	je error_swapgs
 	cmpq $gs_change,RIP+8(%rsp)
-        je error_swapgs
+	je error_swapgs
 	jmp error_sti
 END(error_entry)
 
@@ -1521,7 +1523,7 @@ nmi_schedule:
 	CFI_ENDPROC
 #else
 	jmp paranoid_exit
- 	CFI_ENDPROC
+	CFI_ENDPROC
 #endif
 END(nmi)
 
-- 
cgit v1.2.3-70-g09d2


From 5b3eec0c80038c8739ccd465b897a35c0dff1cc4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 27 Nov 2008 14:41:21 +0100
Subject: x86: ret_from_fork - get rid of jump back

Impact: remove dead code

If we take a closer look at the rff_trace/rff_action ret_from_fork code,
we have to realize that it does all the wrong things: for example it
checks the TIF flag - while later on jumping back to the ret-from-syscall
path - duplicating the check needlessly.

But checking for _TIF_SYSCALL_TRACE is completely unnecessary here because
we clear that flag for every freshly forked task. So the whole "tracing"
code here, for which there is a out of line jump optimization that makes
it even harder to read, is in reality completely dead code ...

Reported-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Tested-by: Cyrill Gorcunov <gorcunov@gmail.com>
---
 arch/x86/kernel/entry_64.S | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e41734a537bd..3194636a4293 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -361,34 +361,35 @@ ENTRY(save_paranoid)
 END(save_paranoid)
 
 /*
- * A newly forked process directly context switches into this.
+ * A newly forked process directly context switches into this address.
+ *
+ * rdi: prev task we switched from
  */
-/* rdi:	prev */
 ENTRY(ret_from_fork)
 	DEFAULT_FRAME
+
 	push kernel_eflags(%rip)
 	CFI_ADJUST_CFA_OFFSET 8
-	popf				# reset kernel eflags
+	popf					# reset kernel eflags
 	CFI_ADJUST_CFA_OFFSET -8
-	call schedule_tail
+
+	call schedule_tail			# rdi: 'prev' task parameter
+
 	GET_THREAD_INFO(%rcx)
-	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
+
 	CFI_REMEMBER_STATE
-	jnz rff_trace
-rff_action:
 	RESTORE_REST
-	testl $3,CS-ARGOFFSET(%rsp)	# from kernel_thread?
+
+	testl $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
 	je   int_ret_from_sys_call
-	testl $_TIF_IA32,TI_flags(%rcx)
+
+	testl $_TIF_IA32, TI_flags(%rcx)	# 32-bit compat task needs IRET
 	jnz  int_ret_from_sys_call
+
 	RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
-	jmp ret_from_sys_call
+	jmp ret_from_sys_call			# go to the SYSRET fastpath
+
 	CFI_RESTORE_STATE
-rff_trace:
-	movq %rsp,%rdi
-	call syscall_trace_leave
-	GET_THREAD_INFO(%rcx)
-	jmp rff_action
 	CFI_ENDPROC
 END(ret_from_fork)
 
-- 
cgit v1.2.3-70-g09d2


From 8caac56305cef98f9357b060a77939d17699937d Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@suse.cz>
Date: Wed, 26 Nov 2008 17:15:27 +0100
Subject: aperture_64.c: clarify that too small aperture is valid reason for
 this code

Impact: update comment

Clarify that too small aperture is valid reason for this code.

Signed-off-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/aperture_64.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 9a32b37ee2ee..676debfc1702 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -1,8 +1,9 @@
 /*
  * Firmware replacement code.
  *
- * Work around broken BIOSes that don't set an aperture or only set the
- * aperture in the AGP bridge.
+ * Work around broken BIOSes that don't set an aperture, only set the
+ * aperture in the AGP bridge, or set too small aperture.
+ *
  * If all fails map the aperture over some low memory.  This is cheaper than
  * doing bounce buffering. The memory is lost. This is done at early boot
  * because only the bootmem allocator can allocate 32+MB.
-- 
cgit v1.2.3-70-g09d2


From 4385cecf1f5866fb33fc95e2ee26a44e9b6f6be2 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Sat, 29 Nov 2008 22:33:16 +0100
Subject: x86: intel_cacheinfo, minor show_type cleanup

Impact: cleanup

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Cc: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/intel_cacheinfo.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 3f46afbb1cf1..68b5d8681cbb 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -644,20 +644,17 @@ static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf)
 	return show_shared_cpu_map_func(leaf, 1, buf);
 }
 
-static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
-	switch(this_leaf->eax.split.type) {
-	    case CACHE_TYPE_DATA:
+static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
+{
+	switch (this_leaf->eax.split.type) {
+	case CACHE_TYPE_DATA:
 		return sprintf(buf, "Data\n");
-		break;
-	    case CACHE_TYPE_INST:
+	case CACHE_TYPE_INST:
 		return sprintf(buf, "Instruction\n");
-		break;
-	    case CACHE_TYPE_UNIFIED:
+	case CACHE_TYPE_UNIFIED:
 		return sprintf(buf, "Unified\n");
-		break;
-	    default:
+	default:
 		return sprintf(buf, "Unknown\n");
-		break;
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From 2c5643b1c5c7fbb13f340d4c58944d9642f41796 Mon Sep 17 00:00:00 2001
From: Hitoshi Mitake <h.mitake@gmail.com>
Date: Sun, 30 Nov 2008 17:16:04 +0900
Subject: x86: provide readq()/writeq() on 32-bit too

Impact: add new API for drivers

Add implementation of readq/writeq to x86_32, and add config value to
the x86 architecture to determine existence of readq/writeq.

Signed-off-by: Hitoshi Mitake <h.mitake@gmail.com>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig          |  2 ++
 arch/x86/include/asm/io.h | 24 ++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ac22bb7719f7..a7d50f5d118c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -19,6 +19,8 @@ config X86_64
 config X86
 	def_bool y
 	select HAVE_AOUT if X86_32
+	select HAVE_READQ
+	select HAVE_WRITEQ
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_IDE
 	select HAVE_OPROFILE
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index ac2abc88cd95..25946449df4f 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -4,6 +4,7 @@
 #define ARCH_HAS_IOREMAP_WC
 
 #include <linux/compiler.h>
+#include <asm-generic/int-ll64.h>
 
 #define build_mmio_read(name, size, type, reg, barrier) \
 static inline type name(const volatile void __iomem *addr) \
@@ -57,6 +58,29 @@ build_mmio_write(__writeq, "q", unsigned long, "r", )
 /* Let people know we have them */
 #define readq readq
 #define writeq writeq
+
+#else  /* CONFIG_X86_32 from here */
+
+static inline __u64 readq(const volatile void __iomem *addr)
+{
+	const volatile u32 __iomem *p = addr;
+	u32 l, h;
+
+	l = readl(p);
+	h = readl(p + 1);
+
+	return l + ((u64)h << 32);
+}
+
+static inline void writeq(__u64 val, volatile void __iomem *addr)
+{
+	writel(val, addr);
+	writel(val >> 32, addr+4);
+}
+
+#define readq		readq
+#define writeq		writeq
+
 #endif
 
 extern int iommu_bio_merge;
-- 
cgit v1.2.3-70-g09d2


From a0b1131e479e5af32eefac8bc54c9742e23d638e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 30 Nov 2008 09:33:55 +0100
Subject: x86: provide readq()/writeq() on 32-bit too, cleanup

Impact: cleanup

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/io.h | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 25946449df4f..3ccfaf610c89 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -55,21 +55,17 @@ build_mmio_write(__writeq, "q", unsigned long, "r", )
 #define __raw_readq __readq
 #define __raw_writeq writeq
 
-/* Let people know we have them */
-#define readq readq
-#define writeq writeq
-
 #else  /* CONFIG_X86_32 from here */
 
 static inline __u64 readq(const volatile void __iomem *addr)
 {
 	const volatile u32 __iomem *p = addr;
-	u32 l, h;
+	u32 low, high;
 
-	l = readl(p);
-	h = readl(p + 1);
+	low = readl(p);
+	high = readl(p + 1);
 
-	return l + ((u64)h << 32);
+	return low + ((u64)high << 32);
 }
 
 static inline void writeq(__u64 val, volatile void __iomem *addr)
@@ -78,11 +74,12 @@ static inline void writeq(__u64 val, volatile void __iomem *addr)
 	writel(val >> 32, addr+4);
 }
 
+#endif
+
+/* Let people know that we have them */
 #define readq		readq
 #define writeq		writeq
 
-#endif
-
 extern int iommu_bio_merge;
 
 #ifdef CONFIG_X86_32
-- 
cgit v1.2.3-70-g09d2


From 93093d099e5dd0c258fd530c12668e828c20df41 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 30 Nov 2008 10:20:20 +0100
Subject: x86: provide readq()/writeq() on 32-bit too, complete

if HAVE_READQ/HAVE_WRITEQ are defined, the full range of readq/writeq
APIs has to be provided to drivers:

 drivers/infiniband/hw/amso1100/c2.c: In function 'c2_tx_ring_alloc':
 drivers/infiniband/hw/amso1100/c2.c:133: error: implicit declaration of function '__raw_writeq'

So provide them on 32-bit as well. Also, map all the APIs to the
strongest ordering variant. It's way too easy to mess such details
up in drivers and the difference between "memory" and "" constrained
asm() constructs is in the noise range.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/io.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 3ccfaf610c89..33513b9a67f3 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -46,16 +46,11 @@ build_mmio_write(__writel, "l", unsigned int, "r", )
 #define mmiowb() barrier()
 
 #ifdef CONFIG_X86_64
+
 build_mmio_read(readq, "q", unsigned long, "=r", :"memory")
-build_mmio_read(__readq, "q", unsigned long, "=r", )
 build_mmio_write(writeq, "q", unsigned long, "r", :"memory")
-build_mmio_write(__writeq, "q", unsigned long, "r", )
-
-#define readq_relaxed(a) __readq(a)
-#define __raw_readq __readq
-#define __raw_writeq writeq
 
-#else  /* CONFIG_X86_32 from here */
+#else
 
 static inline __u64 readq(const volatile void __iomem *addr)
 {
@@ -76,9 +71,14 @@ static inline void writeq(__u64 val, volatile void __iomem *addr)
 
 #endif
 
+#define readq_relaxed(a)	readq(a)
+
+#define __raw_readq(a)		readq(a)
+#define __raw_writeq(val, addr)	writeq(val, addr)
+
 /* Let people know that we have them */
-#define readq		readq
-#define writeq		writeq
+#define readq			readq
+#define writeq			writeq
 
 extern int iommu_bio_merge;
 
-- 
cgit v1.2.3-70-g09d2


From 50cec5c51c18301ff60262fdbe920f4a907c9d81 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 2 Dec 2008 02:17:15 +0900
Subject: x86: fix dma_mapping_error for 32bit x86, cleanup

This removes ifdef CONFIG_X86_64 in dma_mapping_error():

1) Xen people plan to use swiotlb on X86_32 for Dom0 support. swiotlb
   uses ops->mapping_error so X86_32 also needs to check
   ops->mapping_error.

2) Removing #ifdef hack is almost always a good thing.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/dma-mapping.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 097794ff6b79..dc22c0733282 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -71,12 +71,10 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
 /* Make sure we keep the same behaviour */
 static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
-#ifdef CONFIG_X86_64
 	struct dma_mapping_ops *ops = get_dma_ops(dev);
 	if (ops->mapping_error)
 		return ops->mapping_error(dev, dma_addr);
 
-#endif
 	return (dma_addr == bad_dma_address);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 8daa19051e1c7369c89ace7b18e74fe1f55dfa29 Mon Sep 17 00:00:00 2001
From: Niels de Vos <niels.devos@wincor-nixdorf.com>
Date: Mon, 1 Dec 2008 14:13:53 -0800
Subject: x86, apm: remove CONFIG_APM_REAL_MODE_POWER_OFF in favor of a kernel
 parameter

Remove CONFIG_APM_REAL_MODE_POWER_OFF like CONFIG_APM_POWER_OFF which
has been done for linux-2.2.14pre8 (http://lkml.org/lkml/1999/11/23/3).

Re-introducing CONFIG_APM_POWER_OFF got nack-ed. Stephen didn't bother
to remove CONFIG_APM_REAL_MODE_POWER_OFF, let's get rid of it now.
	Reference: http://lkml.org/lkml/2008/5/7/97

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig         | 7 -------
 arch/x86/kernel/apm_32.c | 4 ----
 2 files changed, 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4cf0ab13d187..ebcad15ccf35 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1629,13 +1629,6 @@ config APM_ALLOW_INTS
 	  many of the newer IBM Thinkpads.  If you experience hangs when you
 	  suspend, try setting this to Y.  Otherwise, say N.
 
-config APM_REAL_MODE_POWER_OFF
-	bool "Use real mode APM BIOS call to power off"
-	help
-	  Use real mode APM BIOS calls to switch off the computer. This is
-	  a work-around for a number of buggy BIOSes. Switch this option on if
-	  your computer crashes instead of powering off properly.
-
 endif # APM
 
 source "arch/x86/kernel/cpu/cpufreq/Kconfig"
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 5145a6e72bbb..3a26525a3f31 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -391,11 +391,7 @@ static int power_off;
 #else
 static int power_off = 1;
 #endif
-#ifdef CONFIG_APM_REAL_MODE_POWER_OFF
-static int realmode_power_off = 1;
-#else
 static int realmode_power_off;
-#endif
 #ifdef CONFIG_APM_ALLOW_INTS
 static int allow_ints = 1;
 #else
-- 
cgit v1.2.3-70-g09d2


From dcb7731a185efbf3d800618d874af99895df5afb Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 20:16:03 +0100
Subject: x86: fix broken flushing in GART nofullflush path

Impact: remove stale IOTLB entries

In the non-default nofullflush case the GART is only flushed when
next_bit wraps around. But it can happen that an unmap operation unmaps
memory which is behind the current next_bit location. If these addresses
are reused it may result in stale GART IO/TLB entries. Fix this by
setting the GART next_bit always behind an unmapped location.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-gart_64.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index a42b02b4df68..ba7ad83e20a8 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -123,6 +123,8 @@ static void free_iommu(unsigned long offset, int size)
 
 	spin_lock_irqsave(&iommu_bitmap_lock, flags);
 	iommu_area_free(iommu_gart_bitmap, offset, size);
+	if (offset >= next_bit)
+		next_bit = offset + size;
 	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 181de82ee3ffda1175f89d50c991dae31b79280c Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 3 Dec 2008 14:53:04 +0900
Subject: x86: remove dead BIO_VMERGE_BOUNDARY definition

Impact: cleanup, remove dead code

The block layer dropped the virtual merge feature
(b8b3e16cfe6435d961f6aaebcfd52a1ff2a988c5).

BIO_VMERGE_BOUNDARY definition is meaningless now.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/io.h    | 2 --
 arch/x86/include/asm/io_64.h | 2 --
 arch/x86/kernel/pci-dma.c    | 6 ------
 3 files changed, 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 33513b9a67f3..05cfed4485fa 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -80,8 +80,6 @@ static inline void writeq(__u64 val, volatile void __iomem *addr)
 #define readq			readq
 #define writeq			writeq
 
-extern int iommu_bio_merge;
-
 #ifdef CONFIG_X86_32
 # include "io_32.h"
 #else
diff --git a/arch/x86/include/asm/io_64.h b/arch/x86/include/asm/io_64.h
index fea325a1122f..563c16270ba6 100644
--- a/arch/x86/include/asm/io_64.h
+++ b/arch/x86/include/asm/io_64.h
@@ -232,8 +232,6 @@ void memset_io(volatile void __iomem *a, int b, size_t c);
 
 #define flush_write_buffers()
 
-#define BIO_VMERGE_BOUNDARY iommu_bio_merge
-
 /*
  * Convert a virtual cached pointer to an uncached pointer
  */
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 12eeb4bfcdeb..da93c65f8f0b 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -31,11 +31,6 @@ int no_iommu __read_mostly;
 /* Set this to 1 if there is a HW IOMMU in the system */
 int iommu_detected __read_mostly = 0;
 
-/* This tells the BIO block layer to assume merging. Default to off
-   because we cannot guarantee merging later. */
-int iommu_bio_merge __read_mostly = 0;
-EXPORT_SYMBOL(iommu_bio_merge);
-
 dma_addr_t bad_dma_address __read_mostly = 0;
 EXPORT_SYMBOL(bad_dma_address);
 
@@ -189,7 +184,6 @@ static __init int iommu_setup(char *p)
 		}
 
 		if (!strncmp(p, "biomerge", 8)) {
-			iommu_bio_merge = 4096;
 			iommu_merge = 1;
 			force_iommu = 1;
 		}
-- 
cgit v1.2.3-70-g09d2


From affa219b60a11b3295637a97f5b1b8ef231490fc Mon Sep 17 00:00:00 2001
From: Joe Korty <joe.korty@ccur.com>
Date: Wed, 3 Dec 2008 18:58:19 -0500
Subject: x86: change thread_info's flag field back to 32 bits

Impact: pack struct thread_info more tightly

Change x86_64's thread_info 'flags' field back to __u32.

This was changed to 'unsigned long' when the thread_info*.h
for i386 and x86_64 were merged.  Change it back.  We can
do this as only 27 bits of 'flags' are actually used.

This change actually packs down thread_info by 64 bits:
32 bits are saved by the smaller flags, and 32 bits are
saved by the following 'mm_segment_t field' becoming
naturally 64-bit aligned.

Signed-off-by: Joe Korty <joe.korty@ccur.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/thread_info.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e44d379faad2..8dbc57390d25 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -24,7 +24,7 @@ struct exec_domain;
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
 	struct exec_domain	*exec_domain;	/* execution domain */
-	unsigned long		flags;		/* low level flags */
+	__u32			flags;		/* low level flags */
 	__u32			status;		/* thread synchronous flags */
 	__u32			cpu;		/* current CPU */
 	int			preempt_count;	/* 0 => preemptable,
-- 
cgit v1.2.3-70-g09d2


From 55c395b47042e12d5c25aa07f271f56ffe44f793 Mon Sep 17 00:00:00 2001
From: Michael Tokarev <mjt@tls.msk.ru>
Date: Fri, 5 Dec 2008 14:42:20 +0300
Subject: x86: fix missing space in printk

Just come across this when booting on an old hw..
Looks somewhat ugly, that single missing space ;)

Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/smpboot.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7b1093397319..1a3c3253f0ed 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1086,8 +1086,10 @@ static int __init smp_sanity_check(unsigned max_cpus)
 #endif
 
 	if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
-		printk(KERN_WARNING "weird, boot CPU (#%d) not listed"
-				    "by the BIOS.\n", hard_smp_processor_id());
+		printk(KERN_WARNING
+			"weird, boot CPU (#%d) not listed by the BIOS.\n",
+			hard_smp_processor_id());
+
 		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
 	}
 
-- 
cgit v1.2.3-70-g09d2


From a0286c94f07636380082608196d41dd725a83229 Mon Sep 17 00:00:00 2001
From: Michael Tokarev <mjt@tls.msk.ru>
Date: Fri, 5 Dec 2008 15:47:29 +0300
Subject: x86: fix missing space in printk, #2

Impact: clean up printk

Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-dma.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 192624820217..dc572994703d 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -300,8 +300,8 @@ fs_initcall(pci_iommu_init);
 static __devinit void via_no_dac(struct pci_dev *dev)
 {
 	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
-		printk(KERN_INFO "PCI: VIA PCI bridge detected."
-				 "Disabling DAC.\n");
+		printk(KERN_INFO
+			"PCI: VIA PCI bridge detected. Disabling DAC.\n");
 		forbid_dac = 1;
 	}
 }
-- 
cgit v1.2.3-70-g09d2


From 3e1e9002aa8b32bd4c95ac6c8fad376b7a8127fb Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 8 Dec 2008 00:50:22 +0100
Subject: x86: change static allocation of trampoline area

Impact: fix trampoline sizing bug, save space

While debugging a suspend-to-RAM related issue it occured to me that
if the trampoline code had grown past 4 KB, we would have been
allocating too little memory for it, since the 4 KB size of the
trampoline is hardcoded into arch/x86/kernel/e820.c .  Change that
by making the kernel compute the trampoline size and allocate as much
memory as necessary.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/trampoline.h |  7 +++++++
 arch/x86/kernel/e820.c            | 16 ----------------
 arch/x86/kernel/head32.c          |  3 +++
 arch/x86/kernel/head64.c          |  3 +++
 arch/x86/kernel/trampoline.c      | 19 +++++++++++++++++--
 5 files changed, 30 insertions(+), 18 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h
index fa0d79facdbc..780ba0ab94f9 100644
--- a/arch/x86/include/asm/trampoline.h
+++ b/arch/x86/include/asm/trampoline.h
@@ -3,6 +3,7 @@
 
 #ifndef __ASSEMBLY__
 
+#ifdef CONFIG_X86_TRAMPOLINE
 /*
  * Trampoline 80x86 program as an array.
  */
@@ -13,8 +14,14 @@ extern unsigned char *trampoline_base;
 extern unsigned long init_rsp;
 extern unsigned long initial_code;
 
+#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)
 #define TRAMPOLINE_BASE 0x6000
+
 extern unsigned long setup_trampoline(void);
+extern void __init reserve_trampoline_memory(void);
+#else
+static inline void reserve_trampoline_memory(void) {};
+#endif /* CONFIG_X86_TRAMPOLINE */
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 7aafeb5263ef..65a13943e098 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -677,22 +677,6 @@ struct early_res {
 };
 static struct early_res early_res[MAX_EARLY_RES] __initdata = {
 	{ 0, PAGE_SIZE, "BIOS data page" },	/* BIOS data page */
-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE)
-	{ TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
-#endif
-#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
-	/*
-	 * But first pinch a few for the stack/trampoline stuff
-	 * FIXME: Don't need the extra page at 4K, but need to fix
-	 * trampoline before removing it. (see the GDT stuff)
-	 */
-	{ PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" },
-	/*
-	 * Has to be in very low memory so we can execute
-	 * real-mode AP code.
-	 */
-	{ TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" },
-#endif
 	{}
 };
 
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index fa1d25dd83e3..ac108d1fe182 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -12,9 +12,12 @@
 #include <asm/sections.h>
 #include <asm/e820.h>
 #include <asm/bios_ebda.h>
+#include <asm/trampoline.h>
 
 void __init i386_start_kernel(void)
 {
+	reserve_trampoline_memory();
+
 	reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
 
 #ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index d16084f90649..388e05a5fc17 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -24,6 +24,7 @@
 #include <asm/kdebug.h>
 #include <asm/e820.h>
 #include <asm/bios_ebda.h>
+#include <asm/trampoline.h>
 
 /* boot cpu pda */
 static struct x8664_pda _boot_cpu_pda __read_mostly;
@@ -120,6 +121,8 @@ void __init x86_64_start_reservations(char *real_mode_data)
 {
 	copy_bootdata(__va(real_mode_data));
 
+	reserve_trampoline_memory();
+
 	reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
 
 #ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index 1106fac6024d..808031a5ba19 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -1,10 +1,26 @@
 #include <linux/io.h>
 
 #include <asm/trampoline.h>
+#include <asm/e820.h>
 
 /* ready for x86_64 and x86 */
 unsigned char *trampoline_base = __va(TRAMPOLINE_BASE);
 
+void __init reserve_trampoline_memory(void)
+{
+#ifdef CONFIG_X86_32
+	/*
+	 * But first pinch a few for the stack/trampoline stuff
+	 * FIXME: Don't need the extra page at 4K, but need to fix
+	 * trampoline before removing it. (see the GDT stuff)
+	 */
+	reserve_early(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE");
+#endif
+	/* Has to be in very low memory so we can execute real-mode AP code. */
+	reserve_early(TRAMPOLINE_BASE, TRAMPOLINE_BASE + TRAMPOLINE_SIZE,
+			"TRAMPOLINE");
+}
+
 /*
  * Currently trivial. Write the real->protected mode
  * bootstrap into the page concerned. The caller
@@ -12,7 +28,6 @@ unsigned char *trampoline_base = __va(TRAMPOLINE_BASE);
  */
 unsigned long setup_trampoline(void)
 {
-	memcpy(trampoline_base, trampoline_data,
-	       trampoline_end - trampoline_data);
+	memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
 	return virt_to_phys(trampoline_base);
 }
-- 
cgit v1.2.3-70-g09d2


From 69b88afa8d114a43a3c0431722b79e31d9920692 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 5 Dec 2008 22:45:50 -0800
Subject: x86: clean up get_smp_config()

Impact: cleanup

reorder exit path in __get_smp_config().

also move two print outs to acpi_process_madt

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c | 11 +++++++++++
 arch/x86/kernel/mpparse.c   | 25 +++++++++++--------------
 2 files changed, 22 insertions(+), 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 4c51a2f8fd31..65d0b72777ea 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1360,6 +1360,17 @@ static void __init acpi_process_madt(void)
 			disable_acpi();
 		}
 	}
+
+	/*
+	 * ACPI supports both logical (e.g. Hyper-Threading) and physical
+	 * processors, where MPS only supports physical.
+	 */
+	if (acpi_lapic && acpi_ioapic)
+		printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
+		       "information\n");
+	else if (acpi_lapic)
+		printk(KERN_INFO "Using ACPI for processor (LAPIC) "
+		       "configuration information\n");
 #endif
 	return;
 }
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 0f4c1fd5a1f4..45e3b69808ba 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -586,26 +586,23 @@ static void __init __get_smp_config(unsigned int early)
 {
 	struct intel_mp_floating *mpf = mpf_found;
 
-	if (x86_quirks->mach_get_smp_config) {
-		if (x86_quirks->mach_get_smp_config(early))
-			return;
-	}
+	if (!mpf)
+		return;
+
 	if (acpi_lapic && early)
 		return;
+
 	/*
-	 * ACPI supports both logical (e.g. Hyper-Threading) and physical
-	 * processors, where MPS only supports physical.
+	 * MPS doesn't support hyperthreading, aka only have
+	 * thread 0 apic id in MPS table
 	 */
-	if (acpi_lapic && acpi_ioapic) {
-		printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
-		       "information\n");
+	if (acpi_lapic && acpi_ioapic)
 		return;
-	} else if (acpi_lapic)
-		printk(KERN_INFO "Using ACPI for processor (LAPIC) "
-		       "configuration information\n");
 
-	if (!mpf)
-		return;
+	if (x86_quirks->mach_get_smp_config) {
+		if (x86_quirks->mach_get_smp_config(early))
+			return;
+	}
 
 	printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
 	       mpf->mpf_specification);
-- 
cgit v1.2.3-70-g09d2


From 4217458dafaa57d8e26a46f5d05ab8c53cf64191 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Fri, 5 Dec 2008 17:17:09 -0800
Subject: x86: signal: change type of paramter for sys_rt_sigreturn()

Impact: cleanup on 32-bit

Peter pointed this parameter can be changed.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/syscalls.h | 2 +-
 arch/x86/kernel/signal.c        | 6 ++----
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 87803da44010..3a5252c4b8d6 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -33,7 +33,7 @@ asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
 			     struct old_sigaction __user *);
 asmlinkage int sys_sigaltstack(unsigned long);
 asmlinkage unsigned long sys_sigreturn(unsigned long);
-asmlinkage int sys_rt_sigreturn(unsigned long);
+asmlinkage int sys_rt_sigreturn(struct pt_regs);
 
 /* kernel/ioport.c */
 asmlinkage long sys_iopl(unsigned long);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b1f4d34e0a38..b1cc6da64208 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -642,11 +642,9 @@ badframe:
 }
 
 #ifdef CONFIG_X86_32
-asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+asmlinkage int sys_rt_sigreturn(struct pt_regs regs)
 {
-	struct pt_regs *regs = (struct pt_regs *)&__unused;
-
-	return do_rt_sigreturn(regs);
+	return do_rt_sigreturn(&regs);
 }
 #else /* !CONFIG_X86_32 */
 asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
-- 
cgit v1.2.3-70-g09d2


From 8808500f26a61757cb414da76b271bbd09d5958c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 12 Dec 2008 09:20:12 +0100
Subject: x86: soften multi-BAR mapping sanity check warning message

Impact: make debug warning less scary

The ioremap() time multi-BAR map warning has been causing false
positives:

  http://lkml.org/lkml/2008/12/10/432
  http://lkml.org/lkml/2008/12/11/136

So make it less scary by making it once-per-boot, by making it KERN_INFO
and by adding this text:

  "Info: mapping multiple BARs. Your kernel is fine."

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/ioremap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index d4c4307ff3e0..bd85d42819e1 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -223,7 +223,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
 	 * Check if the request spans more than any BAR in the iomem resource
 	 * tree.
 	 */
-	WARN_ON(iomem_map_sanity_check(phys_addr, size));
+	WARN_ONCE(iomem_map_sanity_check(phys_addr, size),
+		  KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
 
 	/*
 	 * Don't allow anybody to remap normal RAM that we're using..
-- 
cgit v1.2.3-70-g09d2


From 85072bd55219231b8ca5d9d3fa3492eb4fa6635f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 12 Dec 2008 11:08:42 +0100
Subject: x86, debug: remove EBDA debug printk

Remove leftover EBDA debug message.

Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/head.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index 1dcb0f13897e..3e66bd364a9d 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -35,7 +35,6 @@ void __init reserve_ebda_region(void)
 
 	/* start of EBDA area */
 	ebda_addr = get_bios_ebda();
-	printk(KERN_INFO "BIOS EBDA/lowmem at: %08x/%08x\n", ebda_addr, lowmem);
 
 	/* Fixup: bios puts an EBDA in the top 64K segment */
 	/* of conventional memory, but does not adjust lowmem. */
-- 
cgit v1.2.3-70-g09d2


From 16855f878d7127a8bb3925753463485f3071ad76 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Mon, 8 Dec 2008 19:18:38 -0800
Subject: x86: uaccess: return value of __{get|put}_user() can be int

Impact: cleanup

The type of return value of __{get|put}_user() can be int.
There is no user to refer the return value of __{get|put}_user() as long.
This reduces code size a bit on 64-bit.

 $ size vmlinux.*
     text	   data	    bss	    dec	    hex	filename
  4509265	 479988	 673588	5662841	 566879	vmlinux.new
  4511462	 479988	 673588	5665038	 56710e	vmlinux.old

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uaccess.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 35c54921b2e4..580c3ee6c58c 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -350,14 +350,14 @@ do {									\
 
 #define __put_user_nocheck(x, ptr, size)			\
 ({								\
-	long __pu_err;						\
+	int __pu_err;						\
 	__put_user_size((x), (ptr), (size), __pu_err, -EFAULT);	\
 	__pu_err;						\
 })
 
 #define __get_user_nocheck(x, ptr, size)				\
 ({									\
-	long __gu_err;							\
+	int __gu_err;							\
 	unsigned long __gu_val;						\
 	__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT);	\
 	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
-- 
cgit v1.2.3-70-g09d2


From 8f2466f45f75e3cbe3aa2b69d33fd9d6e343b9cc Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Mon, 8 Dec 2008 19:19:07 -0800
Subject: x86: kill #ifdef for exit_idle()

Impact: cleanup

Introduce helper inline function in arch/x86/include/asm/idle.h
to remove #ifdefs around exit_idle().

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/idle.h | 5 +++++
 arch/x86/kernel/apic.c      | 6 ------
 arch/x86/kernel/io_apic.c   | 3 +--
 3 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h
index 44c89c3a23e9..38d87379e270 100644
--- a/arch/x86/include/asm/idle.h
+++ b/arch/x86/include/asm/idle.h
@@ -8,8 +8,13 @@ struct notifier_block;
 void idle_notifier_register(struct notifier_block *n);
 void idle_notifier_unregister(struct notifier_block *n);
 
+#ifdef CONFIG_X86_64
 void enter_idle(void);
 void exit_idle(void);
+#else /* !CONFIG_X86_64 */
+static inline void enter_idle(void) { }
+static inline void exit_idle(void) { }
+#endif /* CONFIG_X86_64 */
 
 void c1e_remove_cpu(int cpu);
 
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 16f94879b525..0fd083713f62 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -814,9 +814,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
 	 * Besides, if we don't timer interrupts ignore the global
 	 * interrupt lock, which is the WrongThing (tm) to do.
 	 */
-#ifdef CONFIG_X86_64
 	exit_idle();
-#endif
 	irq_enter();
 	local_apic_timer_interrupt();
 	irq_exit();
@@ -1682,9 +1680,7 @@ void smp_spurious_interrupt(struct pt_regs *regs)
 {
 	u32 v;
 
-#ifdef CONFIG_X86_64
 	exit_idle();
-#endif
 	irq_enter();
 	/*
 	 * Check if this really is a spurious interrupt and ACK it
@@ -1713,9 +1709,7 @@ void smp_error_interrupt(struct pt_regs *regs)
 {
 	u32 v, v1;
 
-#ifdef CONFIG_X86_64
 	exit_idle();
-#endif
 	irq_enter();
 	/* First tickle the hardware, only then report what went on. -- REW */
 	v = apic_read(APIC_ESR);
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 9043251210fb..679e7bbbbcd6 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -2216,10 +2216,9 @@ static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 asmlinkage void smp_irq_move_cleanup_interrupt(void)
 {
 	unsigned vector, me;
+
 	ack_APIC_irq();
-#ifdef CONFIG_X86_64
 	exit_idle();
-#endif
 	irq_enter();
 
 	me = smp_processor_id();
-- 
cgit v1.2.3-70-g09d2


From 915b0d0104b72fd36af088ba4b11b5690bc96a6c Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Mon, 8 Dec 2008 19:19:26 -0800
Subject: x86: hardirq: introduce inc_irq_stat()

Impact: cleanup

Introduce inc_irq_stat() macro and unify irq_stat accounting code.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/hardirq_32.h |  2 ++
 arch/x86/include/asm/hardirq_64.h |  2 ++
 arch/x86/kernel/apic.c            | 13 +++----------
 arch/x86/kernel/smp.c             | 18 +++---------------
 arch/x86/kernel/traps.c           |  6 +-----
 5 files changed, 11 insertions(+), 30 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h
index 5ca135e72f2b..cf7954d1405f 100644
--- a/arch/x86/include/asm/hardirq_32.h
+++ b/arch/x86/include/asm/hardirq_32.h
@@ -22,6 +22,8 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
 #define __ARCH_IRQ_STAT
 #define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member)
 
+#define inc_irq_stat(member)	(__get_cpu_var(irq_stat).member++)
+
 void ack_bad_irq(unsigned int irq);
 #include <linux/irq_cpustat.h>
 
diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h
index 1ba381fc51d3..b5a6b5d56704 100644
--- a/arch/x86/include/asm/hardirq_64.h
+++ b/arch/x86/include/asm/hardirq_64.h
@@ -11,6 +11,8 @@
 
 #define __ARCH_IRQ_STAT 1
 
+#define inc_irq_stat(member)	add_pda(member, 1)
+
 #define local_softirq_pending() read_pda(__softirq_pending)
 
 #define __ARCH_SET_SOFTIRQ_PENDING 1
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 16f94879b525..1771dd746811 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -783,11 +783,7 @@ static void local_apic_timer_interrupt(void)
 	/*
 	 * the NMI deadlock-detector uses this.
 	 */
-#ifdef CONFIG_X86_64
-	add_pda(apic_timer_irqs, 1);
-#else
-	per_cpu(irq_stat, cpu).apic_timer_irqs++;
-#endif
+	inc_irq_stat(apic_timer_irqs);
 
 	evt->event_handler(evt);
 }
@@ -1695,14 +1691,11 @@ void smp_spurious_interrupt(struct pt_regs *regs)
 	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
 		ack_APIC_irq();
 
-#ifdef CONFIG_X86_64
-	add_pda(irq_spurious_count, 1);
-#else
+	inc_irq_stat(irq_spurious_count);
+
 	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
 	printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
 	       "should never happen.\n", smp_processor_id());
-	__get_cpu_var(irq_stat).irq_spurious_count++;
-#endif
 	irq_exit();
 }
 
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 18f9b19f5f8f..d18537ce2c79 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -178,11 +178,7 @@ static void native_smp_send_stop(void)
 void smp_reschedule_interrupt(struct pt_regs *regs)
 {
 	ack_APIC_irq();
-#ifdef CONFIG_X86_32
-	__get_cpu_var(irq_stat).irq_resched_count++;
-#else
-	add_pda(irq_resched_count, 1);
-#endif
+	inc_irq_stat(irq_resched_count);
 }
 
 void smp_call_function_interrupt(struct pt_regs *regs)
@@ -190,11 +186,7 @@ void smp_call_function_interrupt(struct pt_regs *regs)
 	ack_APIC_irq();
 	irq_enter();
 	generic_smp_call_function_interrupt();
-#ifdef CONFIG_X86_32
-	__get_cpu_var(irq_stat).irq_call_count++;
-#else
-	add_pda(irq_call_count, 1);
-#endif
+	inc_irq_stat(irq_call_count);
 	irq_exit();
 }
 
@@ -203,11 +195,7 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
 	ack_APIC_irq();
 	irq_enter();
 	generic_smp_call_function_single_interrupt();
-#ifdef CONFIG_X86_32
-	__get_cpu_var(irq_stat).irq_call_count++;
-#else
-	add_pda(irq_call_count, 1);
-#endif
+	inc_irq_stat(irq_call_count);
 	irq_exit();
 }
 
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 04d242ab0161..d815293e6d94 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -481,11 +481,7 @@ do_nmi(struct pt_regs *regs, long error_code)
 {
 	nmi_enter();
 
-#ifdef CONFIG_X86_32
-	{ int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); }
-#else
-	add_pda(__nmi_count, 1);
-#endif
+	inc_irq_stat(__nmi_count);
 
 	if (!ignore_nmis)
 		default_do_nmi(regs);
-- 
cgit v1.2.3-70-g09d2


From 9470565579f29486f4ed0ffa50774268b64994b0 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Mon, 1 Dec 2008 14:13:50 -0800
Subject: x86: remove init_mm export as planned for 2.6.26

Impact: remove deprecated export

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/feature-removal-schedule.txt | 12 ------------
 arch/x86/kernel/init_task.c                |  1 -
 2 files changed, 13 deletions(-)

(limited to 'arch/x86')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index c28a2ac88f9d..1a8af7354e79 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -244,18 +244,6 @@ Who:	Michael Buesch <mb@bu3sch.de>
 
 ---------------------------
 
-What:	init_mm export
-When:	2.6.26
-Why:	Not used in-tree. The current out-of-tree users used it to
-	work around problems in the CPA code which should be resolved
-	by now. One usecase was described to provide verification code
-	of the CPA operation. That's a good idea in general, but such
-	code / infrastructure should be in the kernel and not in some
-	out-of-tree driver.
-Who:	Thomas Gleixner <tglx@linutronix.de>
-
-----------------------------
-
 What:	usedac i386 kernel parameter
 When:	2.6.27
 Why:	replaced by allowdac and no dac combination
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index a4f93b4120c1..d39918076bb4 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -14,7 +14,6 @@ static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
-EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */
 
 /*
  * Initial thread structure.
-- 
cgit v1.2.3-70-g09d2


From fd28a5b58dddf5cb5df162ae5c8797a63171c31d Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Tue, 21 Oct 2008 14:05:00 +0200
Subject: x86: remove simnow earlyprintk support

Impact: remove obsolete code

The later versions of SimNow! actually all have serial console
emulation, so the direct interface isn't needed anymore. So remove
the undocumented simnow earlyprintk console.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/early_printk.c | 47 ------------------------------------------
 1 file changed, 47 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 34ad997d3834..23b138e31e9c 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -875,49 +875,6 @@ static struct console early_dbgp_console = {
 };
 #endif
 
-/* Console interface to a host file on AMD's SimNow! */
-
-static int simnow_fd;
-
-enum {
-	MAGIC1 = 0xBACCD00A,
-	MAGIC2 = 0xCA110000,
-	XOPEN = 5,
-	XWRITE = 4,
-};
-
-static noinline long simnow(long cmd, long a, long b, long c)
-{
-	long ret;
-
-	asm volatile("cpuid" :
-		     "=a" (ret) :
-		     "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2));
-	return ret;
-}
-
-static void __init simnow_init(char *str)
-{
-	char *fn = "klog";
-
-	if (*str == '=')
-		fn = ++str;
-	/* error ignored */
-	simnow_fd = simnow(XOPEN, (unsigned long)fn, O_WRONLY|O_APPEND|O_CREAT, 0644);
-}
-
-static void simnow_write(struct console *con, const char *s, unsigned n)
-{
-	simnow(XWRITE, simnow_fd, (unsigned long)s, n);
-}
-
-static struct console simnow_console = {
-	.name =		"simnow",
-	.write =	simnow_write,
-	.flags =	CON_PRINTBUFFER,
-	.index =	-1,
-};
-
 /* Direct interface for emergencies */
 static struct console *early_console = &early_vga_console;
 static int __initdata early_console_initialized;
@@ -960,10 +917,6 @@ static int __init setup_early_printk(char *buf)
 		max_ypos = boot_params.screen_info.orig_video_lines;
 		current_ypos = boot_params.screen_info.orig_y;
 		early_console = &early_vga_console;
-	} else if (!strncmp(buf, "simnow", 6)) {
-		simnow_init(buf + 6);
-		early_console = &simnow_console;
-		keep_early = 1;
 #ifdef CONFIG_EARLY_PRINTK_DBGP
 	} else if (!strncmp(buf, "dbgp", 4)) {
 		if (early_dbgp_init(buf+4) < 0)
-- 
cgit v1.2.3-70-g09d2


From 205516c12dbba003c26b42cfb41e598631300106 Mon Sep 17 00:00:00 2001
From: Ken Chen <kenchen@google.com>
Date: Tue, 16 Dec 2008 00:32:21 -0800
Subject: x86: convert rdtscll() to use __native_read_tsc

Impact: micro-optimization

Is there any reason why x86 rdtscll have to use the out of line
function instead of inline __native_read_tsc()?  native_read_tsc and
__native_read_tsc is essentially the same functions.

Patch to let x86 rdtscll() to use the inline version of read_tsc.

Signed-off-by: Ken Chen <kenchen@google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/msr.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index c2a812ebde89..42f639b991b4 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -181,10 +181,10 @@ static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
 }
 
 #define rdtscl(low)						\
-	((low) = (u32)native_read_tsc())
+	((low) = (u32)__native_read_tsc())
 
 #define rdtscll(val)						\
-	((val) = native_read_tsc())
+	((val) = __native_read_tsc())
 
 #define rdpmc(counter, low, high)			\
 do {							\
-- 
cgit v1.2.3-70-g09d2


From 1796316a8b028a148be48ba5d4e7be493a39d173 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Tue, 16 Dec 2008 11:35:24 +0000
Subject: x86: consolidate __swp_XXX() macros

Impact: cleanup, code robustization

The __swp_...() macros silently relied upon which bits are used for
_PAGE_FILE and _PAGE_PROTNONE. After having changed _PAGE_PROTNONE in
our Xen kernel to no longer overlap _PAGE_PAT, live locks and crashes
were reported that could have been avoided if these macros properly
used the symbolic constants. Since, as pointed out earlier, for Xen
Dom0 support mainline likewise will need to eliminate the conflict
between _PAGE_PAT and _PAGE_PROTNONE, this patch does all the necessary
adjustments, plus it introduces a mechanism to check consistency
between MAX_SWAPFILES_SHIFT and the actual encoding macros.

This also fixes a latent bug in that x86-64 used a 6-bit mask in
__swp_type(), and if MAX_SWAPFILES_SHIFT was increased beyond 5 in (the
seemingly unrelated) linux/swap.h, this would have resulted in a
collision with _PAGE_FILE.

Non-PAE 32-bit code gets similarly adjusted for its pte_to_pgoff() and
pgoff_to_pte() calculations.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/pgtable-2level.h | 50 ++++++++++++++++++++++++++++-------
 arch/x86/include/asm/pgtable-3level.h |  1 +
 arch/x86/include/asm/pgtable.h        | 14 +++++-----
 arch/x86/include/asm/pgtable_64.h     | 20 +++++++++++---
 mm/swapfile.c                         |  9 +++++++
 5 files changed, 75 insertions(+), 19 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index b17edfd23628..e0d199fe1d83 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -56,23 +56,55 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp)
 #define pte_none(x)		(!(x).pte_low)
 
 /*
- * Bits 0, 6 and 7 are taken, split up the 29 bits of offset
- * into this range:
+ * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken,
+ * split up the 29 bits of offset into this range:
  */
 #define PTE_FILE_MAX_BITS	29
+#define PTE_FILE_SHIFT1		(_PAGE_BIT_PRESENT + 1)
+#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
+#define PTE_FILE_SHIFT2		(_PAGE_BIT_FILE + 1)
+#define PTE_FILE_SHIFT3		(_PAGE_BIT_PROTNONE + 1)
+#else
+#define PTE_FILE_SHIFT2		(_PAGE_BIT_PROTNONE + 1)
+#define PTE_FILE_SHIFT3		(_PAGE_BIT_FILE + 1)
+#endif
+#define PTE_FILE_BITS1		(PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1)
+#define PTE_FILE_BITS2		(PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)
 
 #define pte_to_pgoff(pte)						\
-	((((pte).pte_low >> 1) & 0x1f) + (((pte).pte_low >> 8) << 5))
+	((((pte).pte_low >> PTE_FILE_SHIFT1)				\
+	  & ((1U << PTE_FILE_BITS1) - 1))				\
+	 + ((((pte).pte_low >> PTE_FILE_SHIFT2)				\
+	     & ((1U << PTE_FILE_BITS2) - 1)) << PTE_FILE_BITS1)		\
+	 + (((pte).pte_low >> PTE_FILE_SHIFT3)				\
+	    << (PTE_FILE_BITS1 + PTE_FILE_BITS2)))
 
 #define pgoff_to_pte(off)						\
-	((pte_t) { .pte_low = (((off) & 0x1f) << 1) +			\
-			(((off) >> 5) << 8) + _PAGE_FILE })
+	((pte_t) { .pte_low =						\
+	 (((off) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1)	\
+	 + ((((off) >> PTE_FILE_BITS1) & ((1U << PTE_FILE_BITS2) - 1))	\
+	    << PTE_FILE_SHIFT2)						\
+	 + (((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2))		\
+	    << PTE_FILE_SHIFT3)						\
+	 + _PAGE_FILE })
 
 /* Encode and de-code a swap entry */
-#define __swp_type(x)			(((x).val >> 1) & 0x1f)
-#define __swp_offset(x)			((x).val >> 8)
-#define __swp_entry(type, offset)				\
-	((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
+#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
+#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
+#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
+#else
+#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1)
+#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1)
+#endif
+
+#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
+
+#define __swp_type(x)			(((x).val >> (_PAGE_BIT_PRESENT + 1)) \
+					 & ((1U << SWP_TYPE_BITS) - 1))
+#define __swp_offset(x)			((x).val >> SWP_OFFSET_SHIFT)
+#define __swp_entry(type, offset)	((swp_entry_t) { \
+					 ((type) << (_PAGE_BIT_PRESENT + 1)) \
+					 | ((offset) << SWP_OFFSET_SHIFT) })
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { (pte).pte_low })
 #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
 
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 52597aeadfff..447da43cddb3 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -166,6 +166,7 @@ static inline int pte_none(pte_t pte)
 #define PTE_FILE_MAX_BITS       32
 
 /* Encode and de-code a swap entry */
+#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
 #define __swp_type(x)			(((x).val) & 0x1f)
 #define __swp_offset(x)			((x).val >> 5)
 #define __swp_entry(type, offset)	((swp_entry_t){(type) | (offset) << 5})
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index c012f3b11671..b7c2ecdb7658 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -10,7 +10,6 @@
 #define _PAGE_BIT_PCD		4	/* page cache disabled */
 #define _PAGE_BIT_ACCESSED	5	/* was accessed (raised by CPU) */
 #define _PAGE_BIT_DIRTY		6	/* was written to (raised by CPU) */
-#define _PAGE_BIT_FILE		6
 #define _PAGE_BIT_PSE		7	/* 4 MB (or 2MB) page */
 #define _PAGE_BIT_PAT		7	/* on 4KB pages */
 #define _PAGE_BIT_GLOBAL	8	/* Global TLB entry PPro+ */
@@ -22,6 +21,12 @@
 #define _PAGE_BIT_CPA_TEST	_PAGE_BIT_UNUSED1
 #define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
 
+/* If _PAGE_BIT_PRESENT is clear, we use these: */
+/* - if the user mapped it with PROT_NONE; pte_present gives true */
+#define _PAGE_BIT_PROTNONE	_PAGE_BIT_GLOBAL
+/* - set: nonlinear file mapping, saved PTE; unset:swap */
+#define _PAGE_BIT_FILE		_PAGE_BIT_DIRTY
+
 #define _PAGE_PRESENT	(_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
 #define _PAGE_RW	(_AT(pteval_t, 1) << _PAGE_BIT_RW)
 #define _PAGE_USER	(_AT(pteval_t, 1) << _PAGE_BIT_USER)
@@ -46,11 +51,8 @@
 #define _PAGE_NX	(_AT(pteval_t, 0))
 #endif
 
-/* If _PAGE_PRESENT is clear, we use these: */
-#define _PAGE_FILE	_PAGE_DIRTY	/* nonlinear file mapping,
-					 * saved PTE; unset:swap */
-#define _PAGE_PROTNONE	_PAGE_PSE	/* if the user mapped it with PROT_NONE;
-					   pte_present gives true */
+#define _PAGE_FILE	(_AT(pteval_t, 1) << _PAGE_BIT_FILE)
+#define _PAGE_PROTNONE	(_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
 
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |	\
 			 _PAGE_ACCESSED | _PAGE_DIRTY)
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 545a0e042bb2..65b6be6677c7 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -250,10 +250,22 @@ static inline int pud_large(pud_t pte)
 extern int direct_gbpages;
 
 /* Encode and de-code a swap entry */
-#define __swp_type(x)			(((x).val >> 1) & 0x3f)
-#define __swp_offset(x)			((x).val >> 8)
-#define __swp_entry(type, offset)	((swp_entry_t) { ((type) << 1) | \
-							 ((offset) << 8) })
+#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
+#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
+#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
+#else
+#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1)
+#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1)
+#endif
+
+#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
+
+#define __swp_type(x)			(((x).val >> (_PAGE_BIT_PRESENT + 1)) \
+					 & ((1U << SWP_TYPE_BITS) - 1))
+#define __swp_offset(x)			((x).val >> SWP_OFFSET_SHIFT)
+#define __swp_entry(type, offset)	((swp_entry_t) { \
+					 ((type) << (_PAGE_BIT_PRESENT + 1)) \
+					 | ((offset) << SWP_OFFSET_SHIFT) })
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
 #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 90cb67a5417c..54a9f87e5162 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1462,6 +1462,15 @@ static int __init procswaps_init(void)
 __initcall(procswaps_init);
 #endif /* CONFIG_PROC_FS */
 
+#ifdef MAX_SWAPFILES_CHECK
+static int __init max_swapfiles_check(void)
+{
+	MAX_SWAPFILES_CHECK();
+	return 0;
+}
+late_initcall(max_swapfiles_check);
+#endif
+
 /*
  * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
  *
-- 
cgit v1.2.3-70-g09d2


From b93a531e315e97ef00367099e6b5f19651936e20 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Tue, 16 Dec 2008 11:40:27 +0000
Subject: allow bug table entries to use relative pointers (and use it on
 x86-64)

Impact: reduce bug table size

This allows reducing the bug table size by half. Perhaps there are
other 64-bit architectures that could also make use of this.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig           |  4 ++++
 arch/x86/include/asm/bug.h |  2 +-
 include/asm-generic/bug.h  |  8 ++++++++
 lib/bug.c                  | 19 +++++++++++++++++--
 4 files changed, 30 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ac22bb7719f7..ab98cca84e1b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -87,6 +87,10 @@ config GENERIC_IOMAP
 config GENERIC_BUG
 	def_bool y
 	depends on BUG
+	select GENERIC_BUG_RELATIVE_POINTERS if X86_64
+
+config GENERIC_BUG_RELATIVE_POINTERS
+	bool
 
 config GENERIC_HWEIGHT
 	def_bool y
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 3def2065fcea..d9cf1cd156d2 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -9,7 +9,7 @@
 #ifdef CONFIG_X86_32
 # define __BUG_C0	"2:\t.long 1b, %c0\n"
 #else
-# define __BUG_C0	"2:\t.quad 1b, %c0\n"
+# define __BUG_C0	"2:\t.long 1b - 2b, %c0 - 2b\n"
 #endif
 
 #define BUG()							\
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 12c07c1866b2..4c794d73fb84 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -8,9 +8,17 @@
 #ifdef CONFIG_GENERIC_BUG
 #ifndef __ASSEMBLY__
 struct bug_entry {
+#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
 	unsigned long	bug_addr;
+#else
+	signed int	bug_addr_disp;
+#endif
 #ifdef CONFIG_DEBUG_BUGVERBOSE
+#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
 	const char	*file;
+#else
+	signed int	file_disp;
+#endif
 	unsigned short	line;
 #endif
 	unsigned short	flags;
diff --git a/lib/bug.c b/lib/bug.c
index bfeafd60ee9f..300e41afbf97 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -5,6 +5,8 @@
 
   CONFIG_BUG - emit BUG traps.  Nothing happens without this.
   CONFIG_GENERIC_BUG - enable this code.
+  CONFIG_GENERIC_BUG_RELATIVE_POINTERS - use 32-bit pointers relative to
+	the containing struct bug_entry for bug_addr and file.
   CONFIG_DEBUG_BUGVERBOSE - emit full file+line information for each BUG
 
   CONFIG_BUG and CONFIG_DEBUG_BUGVERBOSE are potentially user-settable
@@ -43,6 +45,15 @@
 
 extern const struct bug_entry __start___bug_table[], __stop___bug_table[];
 
+static inline unsigned long bug_addr(const struct bug_entry *bug)
+{
+#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
+	return bug->bug_addr;
+#else
+	return (unsigned long)bug + bug->bug_addr_disp;
+#endif
+}
+
 #ifdef CONFIG_MODULES
 static LIST_HEAD(module_bug_list);
 
@@ -55,7 +66,7 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr)
 		unsigned i;
 
 		for (i = 0; i < mod->num_bugs; ++i, ++bug)
-			if (bugaddr == bug->bug_addr)
+			if (bugaddr == bug_addr(bug))
 				return bug;
 	}
 	return NULL;
@@ -108,7 +119,7 @@ const struct bug_entry *find_bug(unsigned long bugaddr)
 	const struct bug_entry *bug;
 
 	for (bug = __start___bug_table; bug < __stop___bug_table; ++bug)
-		if (bugaddr == bug->bug_addr)
+		if (bugaddr == bug_addr(bug))
 			return bug;
 
 	return module_find_bug(bugaddr);
@@ -133,7 +144,11 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 
 	if (bug) {
 #ifdef CONFIG_DEBUG_BUGVERBOSE
+#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
 		file = bug->file;
+#else
+		file = (const char *)bug + bug->file_disp;
+#endif
 		line = bug->line;
 #endif
 		warning = (bug->flags & BUGFLAG_WARNING) != 0;
-- 
cgit v1.2.3-70-g09d2


From d6be89ad660c5d03edef91715093d447025df59b Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Tue, 16 Dec 2008 11:42:45 +0000
Subject: x86, 32-bit: simplify alloc_low_page()

Impact: cleanup

Neither of the callers really needs the physical address this function
returns, so eliminate the pointless argument.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_32.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 3ffed259883e..333c9e79d46f 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -67,7 +67,7 @@ static unsigned long __meminitdata table_top;
 
 static int __initdata after_init_bootmem;
 
-static __init void *alloc_low_page(unsigned long *phys)
+static __init void *alloc_low_page(void)
 {
 	unsigned long pfn = table_end++;
 	void *adr;
@@ -77,7 +77,6 @@ static __init void *alloc_low_page(unsigned long *phys)
 
 	adr = __va(pfn * PAGE_SIZE);
 	memset(adr, 0, PAGE_SIZE);
-	*phys  = pfn * PAGE_SIZE;
 	return adr;
 }
 
@@ -92,12 +91,11 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
 	pmd_t *pmd_table;
 
 #ifdef CONFIG_X86_PAE
-	unsigned long phys;
 	if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
 		if (after_init_bootmem)
 			pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
 		else
-			pmd_table = (pmd_t *)alloc_low_page(&phys);
+			pmd_table = (pmd_t *)alloc_low_page();
 		paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
 		set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
 		pud = pud_offset(pgd, 0);
@@ -128,10 +126,8 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
 			if (!page_table)
 				page_table =
 				(pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
-		} else {
-			unsigned long phys;
-			page_table = (pte_t *)alloc_low_page(&phys);
-		}
+		} else
+			page_table = (pte_t *)alloc_low_page();
 
 		paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
 		set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
-- 
cgit v1.2.3-70-g09d2


From beeb4195cbc80b7489631361b7ed38b7518af433 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Tue, 16 Dec 2008 11:45:56 +0000
Subject: x86, 32-bit: add some compile time checks to mem_init()

Some of the inconsistencies checked for at run time can be detected at
build time already, so duplicate the checks done at run time to also be
done at build time.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_32.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c483f4242079..d3a45d54547a 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -1040,11 +1040,25 @@ void __init mem_init(void)
 		(unsigned long)&_text, (unsigned long)&_etext,
 		((unsigned long)&_etext - (unsigned long)&_text) >> 10);
 
+	/*
+	 * Check boundaries twice: Some fundamental inconsistencies can
+	 * be detected at build time already.
+	 */
+#define __FIXADDR_TOP (-PAGE_SIZE)
+#ifdef CONFIG_HIGHMEM
+	BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE	> FIXADDR_START);
+	BUILD_BUG_ON(VMALLOC_END			> PKMAP_BASE);
+#endif
+#define high_memory (-128UL << 20)
+	BUILD_BUG_ON(VMALLOC_START			>= VMALLOC_END);
+#undef high_memory
+#undef __FIXADDR_TOP
+
 #ifdef CONFIG_HIGHMEM
 	BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE	> FIXADDR_START);
 	BUG_ON(VMALLOC_END				> PKMAP_BASE);
 #endif
-	BUG_ON(VMALLOC_START				> VMALLOC_END);
+	BUG_ON(VMALLOC_START				>= VMALLOC_END);
 	BUG_ON((unsigned long)high_memory		> VMALLOC_START);
 
 	if (boot_cpu_data.wp_works_ok < 0)
-- 
cgit v1.2.3-70-g09d2


From cfc319833b5b359bf3bce99564dbac00af7925ac Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Tue, 16 Dec 2008 11:46:58 +0000
Subject: x86, 32-bit: improve lazy TLB handling code

Impact: micro-optimize the 32-bit TLB flush code

Use the faster x86_{read,write}_percpu() accessors here.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mmu_context_32.h | 13 ++++++-------
 arch/x86/kernel/tlb_32.c              | 11 +++++------
 2 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h
index 8e10015781fb..7e98ce1d2c0e 100644
--- a/arch/x86/include/asm/mmu_context_32.h
+++ b/arch/x86/include/asm/mmu_context_32.h
@@ -4,9 +4,8 @@
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
 #ifdef CONFIG_SMP
-	unsigned cpu = smp_processor_id();
-	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
-		per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_LAZY;
+	if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK)
+		x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY);
 #endif
 }
 
@@ -20,8 +19,8 @@ static inline void switch_mm(struct mm_struct *prev,
 		/* stop flush ipis for the previous mm */
 		cpu_clear(cpu, prev->cpu_vm_mask);
 #ifdef CONFIG_SMP
-		per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK;
-		per_cpu(cpu_tlbstate, cpu).active_mm = next;
+		x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
+		x86_write_percpu(cpu_tlbstate.active_mm, next);
 #endif
 		cpu_set(cpu, next->cpu_vm_mask);
 
@@ -36,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev,
 	}
 #ifdef CONFIG_SMP
 	else {
-		per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK;
-		BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next);
+		x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
+		BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next);
 
 		if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
 			/* We were in lazy tlb mode and leave_mm disabled
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index f4049f3513b6..4290d918b58a 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -34,9 +34,8 @@ static DEFINE_SPINLOCK(tlbstate_lock);
  */
 void leave_mm(int cpu)
 {
-	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
-		BUG();
-	cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
+	BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK);
+	cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask);
 	load_cr3(swapper_pg_dir);
 }
 EXPORT_SYMBOL_GPL(leave_mm);
@@ -104,8 +103,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
 		 * BUG();
 		 */
 
-	if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
-		if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
+	if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) {
+		if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) {
 			if (flush_va == TLB_FLUSH_ALL)
 				local_flush_tlb();
 			else
@@ -238,7 +237,7 @@ static void do_flush_tlb_all(void *info)
 	unsigned long cpu = smp_processor_id();
 
 	__flush_tlb_all();
-	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
+	if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY)
 		leave_mm(cpu);
 }
 
-- 
cgit v1.2.3-70-g09d2


From b6fd6f26733e864fba2ea3eb1d716e23d2e66f3a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 16 Dec 2008 19:23:36 +0100
Subject: x86, mm: limit MAXMEM on 64-bit

on 64-bit x86 the physical memory limit is controlled by the sparsemem
bits - which are 44 bits right now. But MAXMEM (the max pfn number
e820 parsing will allow to enter our sizing routines) is set to
0x00003fffffffffff, i.e. 46 bits - that's too large because it overlaps
into the vmalloc range.

So couple MAXMEM to MAX_PHYSMEM_BITS, and add a comment that the
maximum of MAX_PHYSMEM_BITS is 45 bits.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/pgtable_64.h | 2 +-
 arch/x86/include/asm/sparsemem.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 65b6be6677c7..c54ba69608bd 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -146,7 +146,7 @@ static inline void native_pgd_clear(pgd_t *pgd)
 #define PGDIR_MASK	(~(PGDIR_SIZE - 1))
 
 
-#define MAXMEM		 _AC(0x00003fffffffffff, UL)
+#define MAXMEM		 _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
 #define VMALLOC_START    _AC(0xffffc20000000000, UL)
 #define VMALLOC_END      _AC(0xffffe1ffffffffff, UL)
 #define VMEMMAP_START	 _AC(0xffffe20000000000, UL)
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index be44f7dab395..e3cc3c063ec5 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -27,7 +27,7 @@
 #else /* CONFIG_X86_32 */
 # define SECTION_SIZE_BITS	27 /* matt - 128 is convenient right now */
 # define MAX_PHYSADDR_BITS	44
-# define MAX_PHYSMEM_BITS	44
+# define MAX_PHYSMEM_BITS	44 /* Can be max 45 bits */
 #endif
 
 #endif /* CONFIG_SPARSEMEM */
-- 
cgit v1.2.3-70-g09d2


From 3c763fd77e66e55d029052da31df0abd9920cb1e Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Tue, 16 Dec 2008 19:07:47 +0100
Subject: x86: microcode_amd: fix wrong handling of equivalent CPU id

Impact: fix bug resulting in non-loaded AMD microcode

mc_header->processor_rev_id is a 2 byte value. Similar is true for
equiv_cpu in an equiv_cpu_entry -- only 2 bytes are of interest.

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_amd.c | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 5f8e5d75a254..b5bc81470bcf 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -62,7 +62,7 @@ struct microcode_header_amd {
 	unsigned int  mc_patch_data_checksum;
 	unsigned int  nb_dev_id;
 	unsigned int  sb_dev_id;
-	unsigned char processor_rev_id[2];
+	u16 processor_rev_id;
 	unsigned char nb_rev_id;
 	unsigned char sb_rev_id;
 	unsigned char bios_api_rev;
@@ -125,7 +125,7 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
 
 	while (equiv_cpu_table[i].installed_cpu != 0) {
 		if (current_cpu_id == equiv_cpu_table[i].installed_cpu) {
-			equiv_cpu_id = equiv_cpu_table[i].equiv_cpu;
+			equiv_cpu_id = equiv_cpu_table[i].equiv_cpu & 0xffff;
 			break;
 		}
 		i++;
@@ -137,21 +137,10 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
 		return 0;
 	}
 
-	if ((mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff)) {
-		printk(KERN_ERR
-			"microcode: CPU%d patch does not match "
-			"(patch is %x, cpu extended is %x) \n",
-			cpu, mc_header->processor_rev_id[0],
-			(equiv_cpu_id & 0xff));
-		return 0;
-	}
-
-	if ((mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff)) {
-		printk(KERN_ERR "microcode: CPU%d patch does not match "
-			"(patch is %x, cpu base id is %x) \n",
-			cpu, mc_header->processor_rev_id[1],
-			((equiv_cpu_id >> 16) & 0xff));
-
+	if (mc_header->processor_rev_id != equiv_cpu_id) {
+		printk(KERN_ERR	"microcode: CPU%d patch does not match "
+		       "(processor_rev_id: %x, eqiv_cpu_id: %x)\n",
+		       cpu, mc_header->processor_rev_id, equiv_cpu_id);
 		return 0;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 2a3282a77b02fb47576ffbdb4867c8c6eeb83ed5 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Tue, 16 Dec 2008 19:08:53 +0100
Subject: x86: microcode_amd: fix typos and trailing whitespaces in log
 messages

Impact: fix printk typos

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_amd.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index b5bc81470bcf..83a9fa321d9b 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -10,7 +10,7 @@
  *  This driver allows to upgrade microcode on AMD
  *  family 0x10 and 0x11 processors.
  *
- *  Licensed unter the terms of the GNU General Public
+ *  Licensed under the terms of the GNU General Public
  *  License version 2. See file COPYING for details.
 */
 
@@ -133,7 +133,7 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
 
 	if (!equiv_cpu_id) {
 		printk(KERN_ERR "microcode: CPU%d cpu_id "
-		       "not found in equivalent cpu table \n", cpu);
+		       "not found in equivalent cpu table\n", cpu);
 		return 0;
 	}
 
@@ -151,7 +151,7 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
 					    NULL);
 		if ((!nb_pci_dev) ||
 		    (mc_header->nb_rev_id != nb_pci_dev->revision)) {
-			printk(KERN_ERR "microcode: CPU%d NB mismatch \n", cpu);
+			printk(KERN_ERR "microcode: CPU%d NB mismatch\n", cpu);
 			pci_dev_put(nb_pci_dev);
 			return 0;
 		}
@@ -165,7 +165,7 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
 					    NULL);
 		if ((!sb_pci_dev) ||
 		    (mc_header->sb_rev_id != sb_pci_dev->revision)) {
-			printk(KERN_ERR "microcode: CPU%d SB mismatch \n", cpu);
+			printk(KERN_ERR "microcode: CPU%d SB mismatch\n", cpu);
 			pci_dev_put(sb_pci_dev);
 			return 0;
 		}
@@ -219,7 +219,7 @@ static void apply_microcode_amd(int cpu)
 	}
 
 	printk(KERN_INFO "microcode: CPU%d updated from revision "
-	       "0x%x to 0x%x \n",
+	       "0x%x to 0x%x\n",
 	       cpu_num, uci->cpu_sig.rev, mc_amd->hdr.patch_id);
 
 	uci->cpu_sig.rev = rev;
@@ -282,7 +282,7 @@ static int install_equiv_cpu_table(u8 *buf,
 
 	if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
 		printk(KERN_ERR "microcode: error! "
-		       "Wrong microcode equivalnet cpu table\n");
+		       "Wrong microcode equivalent cpu table\n");
 		return 0;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From be957763b01905d33b53cdd25c8df110f94f499a Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Tue, 16 Dec 2008 19:11:23 +0100
Subject: x86: microcode_amd: fix checkpatch warnings/errors

Impact: cleanup

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_amd.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 83a9fa321d9b..a8a0ec600554 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -32,9 +32,9 @@
 #include <linux/platform_device.h>
 #include <linux/pci.h>
 #include <linux/pci_ids.h>
+#include <linux/uaccess.h>
 
 #include <asm/msr.h>
-#include <asm/uaccess.h>
 #include <asm/processor.h>
 #include <asm/microcode.h>
 
@@ -225,7 +225,7 @@ static void apply_microcode_amd(int cpu)
 	uci->cpu_sig.rev = rev;
 }
 
-static void * get_next_ucode(u8 *buf, unsigned int size,
+static void *get_next_ucode(u8 *buf, unsigned int size,
 			int (*get_ucode_data)(void *, const void *, size_t),
 			unsigned int *mc_size)
 {
@@ -256,7 +256,8 @@ static void * get_next_ucode(u8 *buf, unsigned int size,
 	mc = vmalloc(UCODE_MAX_SIZE);
 	if (mc) {
 		memset(mc, 0, UCODE_MAX_SIZE);
-		if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size)) {
+		if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR,
+				   total_size)) {
 			vfree(mc);
 			mc = NULL;
 		} else
@@ -332,7 +333,8 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
 		unsigned int uninitialized_var(mc_size);
 		struct microcode_header_amd *mc_header;
 
-		mc = get_next_ucode(ucode_ptr, leftover, get_ucode_data, &mc_size);
+		mc = get_next_ucode(ucode_ptr, leftover, get_ucode_data,
+				    &mc_size);
 		if (!mc)
 			break;
 
@@ -342,7 +344,7 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
 				vfree(new_mc);
 			new_rev = mc_header->patch_id;
 			new_mc  = mc;
-		} else 
+		} else
 			vfree(mc);
 
 		ucode_ptr += mc_size;
@@ -354,9 +356,9 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
 			if (uci->mc)
 				vfree(uci->mc);
 			uci->mc = new_mc;
-			pr_debug("microcode: CPU%d found a matching microcode update with"
-				" version 0x%x (current=0x%x)\n",
-				cpu, new_rev, uci->cpu_sig.rev);
+			pr_debug("microcode: CPU%d found a matching microcode "
+				 "update with version 0x%x (current=0x%x)\n",
+				 cpu, new_rev, uci->cpu_sig.rev);
 		} else
 			vfree(new_mc);
 	}
@@ -383,7 +385,8 @@ static int request_microcode_fw(int cpu, struct device *device)
 
 	ret = request_firmware(&firmware, fw_name, device);
 	if (ret) {
-		printk(KERN_ERR "microcode: ucode data file %s load failed\n", fw_name);
+		printk(KERN_ERR "microcode: ucode data file %s load failed\n",
+		       fw_name);
 		return ret;
 	}
 
@@ -397,8 +400,8 @@ static int request_microcode_fw(int cpu, struct device *device)
 
 static int request_microcode_user(int cpu, const void __user *buf, size_t size)
 {
-	printk(KERN_WARNING "microcode: AMD microcode update via /dev/cpu/microcode"
-			"is not supported\n");
+	printk(KERN_WARNING "microcode: AMD microcode update via "
+	       "/dev/cpu/microcode is not supported\n");
 	return -1;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 8c135206c826095c852c16d94a0a74eeaf05c90d Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Tue, 16 Dec 2008 19:13:00 +0100
Subject: x86: microcode_amd: fix compile warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Impact: fix build warning

  CC      arch/x86/kernel/microcode_amd.o
arch/x86/kernel/microcode_amd.c: In function ‘request_microcode_fw’:
arch/x86/kernel/microcode_amd.c:393: warning: passing argument 2 of ‘generic_load_microcode’ discards qualifiers from pointer target type

(Respect "const" qualifier of firmware->data.)

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_amd.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index a8a0ec600554..89b386c901fd 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -225,8 +225,8 @@ static void apply_microcode_amd(int cpu)
 	uci->cpu_sig.rev = rev;
 }
 
-static void *get_next_ucode(u8 *buf, unsigned int size,
-			int (*get_ucode_data)(void *, const void *, size_t),
+static void *get_next_ucode(const u8 *buf, unsigned int size,
+			int (*get_ucode_data)(void *, const u8 *, size_t),
 			unsigned int *mc_size)
 {
 	unsigned int total_size;
@@ -268,8 +268,8 @@ static void *get_next_ucode(u8 *buf, unsigned int size,
 }
 
 
-static int install_equiv_cpu_table(u8 *buf,
-		int (*get_ucode_data)(void *, const void *, size_t))
+static int install_equiv_cpu_table(const u8 *buf,
+		int (*get_ucode_data)(void *, const u8 *, size_t))
 {
 #define UCODE_CONTAINER_HEADER_SIZE	12
 	u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE];
@@ -311,11 +311,13 @@ static void free_equiv_cpu_table(void)
 	}
 }
 
-static int generic_load_microcode(int cpu, void *data, size_t size,
-		int (*get_ucode_data)(void *, const void *, size_t))
+static int generic_load_microcode(int cpu, const u8 *data, size_t size,
+		int (*get_ucode_data)(void *, const u8 *, size_t))
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	u8 *ucode_ptr = data, *new_mc = NULL, *mc;
+	const u8 *ucode_ptr = data;
+	void *new_mc = NULL;
+	void *mc;
 	int new_rev = uci->cpu_sig.rev;
 	unsigned int leftover;
 	unsigned long offset;
@@ -368,7 +370,7 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
 	return (int)leftover;
 }
 
-static int get_ucode_fw(void *to, const void *from, size_t n)
+static int get_ucode_fw(void *to, const u8 *from, size_t n)
 {
 	memcpy(to, from, n);
 	return 0;
@@ -390,7 +392,7 @@ static int request_microcode_fw(int cpu, struct device *device)
 		return ret;
 	}
 
-	ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size,
+	ret = generic_load_microcode(cpu, firmware->data, firmware->size,
 			&get_ucode_fw);
 
 	release_firmware(firmware);
-- 
cgit v1.2.3-70-g09d2


From 0657d9ebff186dcdb17e582dcb909028775a7707 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Tue, 16 Dec 2008 19:14:05 +0100
Subject: x86: microcode_amd: don't pass superfluous function pointer for
 get_ucode_data

Impact: cleanup

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_amd.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 89b386c901fd..c7f225c7e481 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -225,9 +225,14 @@ static void apply_microcode_amd(int cpu)
 	uci->cpu_sig.rev = rev;
 }
 
+static int get_ucode_data(void *to, const u8 *from, size_t n)
+{
+	memcpy(to, from, n);
+	return 0;
+}
+
 static void *get_next_ucode(const u8 *buf, unsigned int size,
-			int (*get_ucode_data)(void *, const u8 *, size_t),
-			unsigned int *mc_size)
+			    unsigned int *mc_size)
 {
 	unsigned int total_size;
 #define UCODE_CONTAINER_SECTION_HDR	8
@@ -268,8 +273,7 @@ static void *get_next_ucode(const u8 *buf, unsigned int size,
 }
 
 
-static int install_equiv_cpu_table(const u8 *buf,
-		int (*get_ucode_data)(void *, const u8 *, size_t))
+static int install_equiv_cpu_table(const u8 *buf)
 {
 #define UCODE_CONTAINER_HEADER_SIZE	12
 	u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE];
@@ -311,8 +315,7 @@ static void free_equiv_cpu_table(void)
 	}
 }
 
-static int generic_load_microcode(int cpu, const u8 *data, size_t size,
-		int (*get_ucode_data)(void *, const u8 *, size_t))
+static int generic_load_microcode(int cpu, const u8 *data, size_t size)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	const u8 *ucode_ptr = data;
@@ -322,7 +325,7 @@ static int generic_load_microcode(int cpu, const u8 *data, size_t size,
 	unsigned int leftover;
 	unsigned long offset;
 
-	offset = install_equiv_cpu_table(ucode_ptr, get_ucode_data);
+	offset = install_equiv_cpu_table(ucode_ptr);
 	if (!offset) {
 		printk(KERN_ERR "microcode: installing equivalent cpu table failed\n");
 		return -EINVAL;
@@ -335,8 +338,7 @@ static int generic_load_microcode(int cpu, const u8 *data, size_t size,
 		unsigned int uninitialized_var(mc_size);
 		struct microcode_header_amd *mc_header;
 
-		mc = get_next_ucode(ucode_ptr, leftover, get_ucode_data,
-				    &mc_size);
+		mc = get_next_ucode(ucode_ptr, leftover, &mc_size);
 		if (!mc)
 			break;
 
@@ -370,12 +372,6 @@ static int generic_load_microcode(int cpu, const u8 *data, size_t size,
 	return (int)leftover;
 }
 
-static int get_ucode_fw(void *to, const u8 *from, size_t n)
-{
-	memcpy(to, from, n);
-	return 0;
-}
-
 static int request_microcode_fw(int cpu, struct device *device)
 {
 	const char *fw_name = "amd-ucode/microcode_amd.bin";
@@ -392,8 +388,7 @@ static int request_microcode_fw(int cpu, struct device *device)
 		return ret;
 	}
 
-	ret = generic_load_microcode(cpu, firmware->data, firmware->size,
-			&get_ucode_fw);
+	ret = generic_load_microcode(cpu, firmware->data, firmware->size);
 
 	release_firmware(firmware);
 
-- 
cgit v1.2.3-70-g09d2


From 29d0887ffd084cde9d6a1286cb82b71701a974dd Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Tue, 16 Dec 2008 19:16:34 +0100
Subject: x86: microcode_amd: replace inline asm by common rdmsr/wrmsr
 functions

Impact: cleanup

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/msr-index.h |  2 ++
 arch/x86/kernel/microcode_amd.c  | 23 +++++------------------
 2 files changed, 7 insertions(+), 18 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index e38859d577a1..cb58643947b9 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -85,7 +85,9 @@
 /* AMD64 MSRs. Not complete. See the architecture manual for a more
    complete list. */
 
+#define MSR_AMD64_PATCH_LEVEL		0x0000008b
 #define MSR_AMD64_NB_CFG		0xc001001f
+#define MSR_AMD64_PATCH_LOADER		0xc0010020
 #define MSR_AMD64_IBSFETCHCTL		0xc0011030
 #define MSR_AMD64_IBSFETCHLINAD		0xc0011031
 #define MSR_AMD64_IBSFETCHPHYSAD	0xc0011032
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index c7f225c7e481..2856955ddab1 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -93,6 +93,7 @@ static struct equiv_cpu_entry *equiv_cpu_table;
 static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	u32 dummy;
 
 	memset(csig, 0, sizeof(*csig));
 
@@ -102,9 +103,7 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 		return -1;
 	}
 
-	asm volatile("movl %1, %%ecx; rdmsr"
-		     : "=a" (csig->rev)
-		     : "i" (0x0000008B) : "ecx");
+	rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy);
 
 	printk(KERN_INFO "microcode: collect_cpu_info_amd : patch_id=0x%x\n",
 		csig->rev);
@@ -181,12 +180,10 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
 static void apply_microcode_amd(int cpu)
 {
 	unsigned long flags;
-	unsigned int eax, edx;
-	unsigned int rev;
+	u32 rev, dummy;
 	int cpu_num = raw_smp_processor_id();
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
 	struct microcode_amd *mc_amd = uci->mc;
-	unsigned long addr;
 
 	/* We should bind the task to the CPU */
 	BUG_ON(cpu_num != cpu);
@@ -195,19 +192,9 @@ static void apply_microcode_amd(int cpu)
 		return;
 
 	spin_lock_irqsave(&microcode_update_lock, flags);
-
-	addr = (unsigned long)&mc_amd->hdr.data_code;
-	edx = (unsigned int)(((unsigned long)upper_32_bits(addr)));
-	eax = (unsigned int)(((unsigned long)lower_32_bits(addr)));
-
-	asm volatile("movl %0, %%ecx; wrmsr" :
-		     : "i" (0xc0010020), "a" (eax), "d" (edx) : "ecx");
-
+	wrmsrl(MSR_AMD64_PATCH_LOADER, &mc_amd->hdr.data_code);
 	/* get patch id after patching */
-	asm volatile("movl %1, %%ecx; rdmsr"
-		     : "=a" (rev)
-		     : "i" (0x0000008B) : "ecx");
-
+	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
 	spin_unlock_irqrestore(&microcode_update_lock, flags);
 
 	/* check current patch id and patch's id for match */
-- 
cgit v1.2.3-70-g09d2


From 6cc9b6d94b6fee23b0671970f67d297fa76b68b3 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Tue, 16 Dec 2008 19:17:45 +0100
Subject: x86: microcode_amd: consolidate macro definitions

Impact: cleanup

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_amd.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 2856955ddab1..e68e723490a3 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -75,15 +75,9 @@ struct microcode_amd {
 	unsigned int mpb[0];
 };
 
-#define UCODE_MAX_SIZE          (2048)
-#define DEFAULT_UCODE_DATASIZE	(896)
-#define MC_HEADER_SIZE		(sizeof(struct microcode_header_amd))
-#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
-#define DWSIZE			(sizeof(u32))
-/* For now we support a fixed ucode total size only */
-#define get_totalsize(mc) \
-	((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \
-	 + MC_HEADER_SIZE)
+#define UCODE_MAX_SIZE			2048
+#define UCODE_CONTAINER_SECTION_HDR	8
+#define UCODE_CONTAINER_HEADER_SIZE	12
 
 /* serialize access to the physical write */
 static DEFINE_SPINLOCK(microcode_update_lock);
@@ -222,7 +216,6 @@ static void *get_next_ucode(const u8 *buf, unsigned int size,
 			    unsigned int *mc_size)
 {
 	unsigned int total_size;
-#define UCODE_CONTAINER_SECTION_HDR	8
 	u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
 	void *mc;
 
@@ -255,14 +248,12 @@ static void *get_next_ucode(const u8 *buf, unsigned int size,
 		} else
 			*mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
 	}
-#undef UCODE_CONTAINER_SECTION_HDR
 	return mc;
 }
 
 
 static int install_equiv_cpu_table(const u8 *buf)
 {
-#define UCODE_CONTAINER_HEADER_SIZE	12
 	u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE];
 	unsigned int *buf_pos = (unsigned int *)container_hdr;
 	unsigned long size;
@@ -291,7 +282,6 @@ static int install_equiv_cpu_table(const u8 *buf)
 	}
 
 	return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
-#undef UCODE_CONTAINER_HEADER_SIZE
 }
 
 static void free_equiv_cpu_table(void)
-- 
cgit v1.2.3-70-g09d2


From 98415301ea2dd389539ab429bcfa9da07219eabc Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Tue, 16 Dec 2008 19:20:21 +0100
Subject: x86: microcode_amd: remove (wrong) chipset deivce ID checks

Impact: remove dead/incorrect code

Currently there is no chipset specific ucode. The checks are incorrect
anyway (e.g. pci device IDs are 16 bit and not 8 bit).

Thus I remove the stuff for the time being and will reintroduce it if
it's foreseeable that it is really needed.

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_amd.c | 32 +++++---------------------------
 1 file changed, 5 insertions(+), 27 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index e68e723490a3..2e8af6ef3da9 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -108,7 +108,6 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 static int get_matching_microcode(int cpu, void *mc, int rev)
 {
 	struct microcode_header_amd *mc_header = mc;
-	struct pci_dev *nb_pci_dev, *sb_pci_dev;
 	unsigned int current_cpu_id;
 	unsigned int equiv_cpu_id = 0x00;
 	unsigned int i = 0;
@@ -137,32 +136,11 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
 		return 0;
 	}
 
-	/* ucode may be northbridge specific */
-	if (mc_header->nb_dev_id) {
-		nb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD,
-					    (mc_header->nb_dev_id & 0xff),
-					    NULL);
-		if ((!nb_pci_dev) ||
-		    (mc_header->nb_rev_id != nb_pci_dev->revision)) {
-			printk(KERN_ERR "microcode: CPU%d NB mismatch\n", cpu);
-			pci_dev_put(nb_pci_dev);
-			return 0;
-		}
-		pci_dev_put(nb_pci_dev);
-	}
-
-	/* ucode may be southbridge specific */
-	if (mc_header->sb_dev_id) {
-		sb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD,
-					    (mc_header->sb_dev_id & 0xff),
-					    NULL);
-		if ((!sb_pci_dev) ||
-		    (mc_header->sb_rev_id != sb_pci_dev->revision)) {
-			printk(KERN_ERR "microcode: CPU%d SB mismatch\n", cpu);
-			pci_dev_put(sb_pci_dev);
-			return 0;
-		}
-		pci_dev_put(sb_pci_dev);
+	/* ucode might be chipset specific -- currently we don't support this */
+	if (mc_header->nb_dev_id || mc_header->sb_dev_id) {
+		printk(KERN_WARNING "microcode: CPU%d loading of chipset "
+		       "specific code not yet supported\n", cpu);
+		return 0;
 	}
 
 	if (mc_header->patch_id <= rev)
-- 
cgit v1.2.3-70-g09d2


From 5549b94bc74c3e7edd44e0aeb7d9f773e82d2d20 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Tue, 16 Dec 2008 19:21:30 +0100
Subject: x86: microcode_amd: use 'packed' attribute for structs

Impact: cleanup

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_amd.c | 45 +++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 2e8af6ef3da9..e1ce650f276b 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -47,28 +47,29 @@ MODULE_LICENSE("GPL v2");
 #define UCODE_UCODE_TYPE           0x00000001
 
 struct equiv_cpu_entry {
-	unsigned int installed_cpu;
-	unsigned int fixed_errata_mask;
-	unsigned int fixed_errata_compare;
-	unsigned int equiv_cpu;
-};
+	u32	installed_cpu;
+	u32	fixed_errata_mask;
+	u32	fixed_errata_compare;
+	u16	equiv_cpu;
+	u16	res;
+} __attribute__((packed));
 
 struct microcode_header_amd {
-	unsigned int  data_code;
-	unsigned int  patch_id;
-	unsigned char mc_patch_data_id[2];
-	unsigned char mc_patch_data_len;
-	unsigned char init_flag;
-	unsigned int  mc_patch_data_checksum;
-	unsigned int  nb_dev_id;
-	unsigned int  sb_dev_id;
-	u16 processor_rev_id;
-	unsigned char nb_rev_id;
-	unsigned char sb_rev_id;
-	unsigned char bios_api_rev;
-	unsigned char reserved1[3];
-	unsigned int  match_reg[8];
-};
+	u32	data_code;
+	u32	patch_id;
+	u16	mc_patch_data_id;
+	u8	mc_patch_data_len;
+	u8	init_flag;
+	u32	mc_patch_data_checksum;
+	u32	nb_dev_id;
+	u32	sb_dev_id;
+	u16	processor_rev_id;
+	u8	nb_rev_id;
+	u8	sb_rev_id;
+	u8	bios_api_rev;
+	u8	reserved1[3];
+	u32	match_reg[8];
+} __attribute__((packed));
 
 struct microcode_amd {
 	struct microcode_header_amd hdr;
@@ -109,7 +110,7 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
 {
 	struct microcode_header_amd *mc_header = mc;
 	unsigned int current_cpu_id;
-	unsigned int equiv_cpu_id = 0x00;
+	u16 equiv_cpu_id = 0;
 	unsigned int i = 0;
 
 	BUG_ON(equiv_cpu_table == NULL);
@@ -117,7 +118,7 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
 
 	while (equiv_cpu_table[i].installed_cpu != 0) {
 		if (current_cpu_id == equiv_cpu_table[i].installed_cpu) {
-			equiv_cpu_id = equiv_cpu_table[i].equiv_cpu & 0xffff;
+			equiv_cpu_id = equiv_cpu_table[i].equiv_cpu;
 			break;
 		}
 		i++;
-- 
cgit v1.2.3-70-g09d2


From df23cab563912ba43f7e9bc8ac517e5a2ddc9cd2 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Tue, 16 Dec 2008 19:22:36 +0100
Subject: x86: microcode_amd: modify log messages

Impact: change microcode printk content

Change log level and provide (at least I tried to;-) consistent, short,
meaningful content.

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_amd.c | 58 ++++++++++++++++++-----------------------
 1 file changed, 26 insertions(+), 32 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index e1ce650f276b..24c256f4e50a 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -91,18 +91,13 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 	u32 dummy;
 
 	memset(csig, 0, sizeof(*csig));
-
 	if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
-		printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n",
-		       cpu);
+		printk(KERN_WARNING "microcode: CPU%d: AMD CPU family 0x%x not "
+		       "supported\n", cpu, c->x86);
 		return -1;
 	}
-
 	rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy);
-
-	printk(KERN_INFO "microcode: collect_cpu_info_amd : patch_id=0x%x\n",
-		csig->rev);
-
+	printk(KERN_INFO "microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev);
 	return 0;
 }
 
@@ -125,21 +120,21 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
 	}
 
 	if (!equiv_cpu_id) {
-		printk(KERN_ERR "microcode: CPU%d cpu_id "
-		       "not found in equivalent cpu table\n", cpu);
+		printk(KERN_WARNING "microcode: CPU%d: cpu revision "
+		       "not listed in equivalent cpu table\n", cpu);
 		return 0;
 	}
 
 	if (mc_header->processor_rev_id != equiv_cpu_id) {
-		printk(KERN_ERR	"microcode: CPU%d patch does not match "
-		       "(processor_rev_id: %x, eqiv_cpu_id: %x)\n",
+		printk(KERN_ERR	"microcode: CPU%d: patch mismatch "
+		       "(processor_rev_id: %x, equiv_cpu_id: %x)\n",
 		       cpu, mc_header->processor_rev_id, equiv_cpu_id);
 		return 0;
 	}
 
 	/* ucode might be chipset specific -- currently we don't support this */
 	if (mc_header->nb_dev_id || mc_header->sb_dev_id) {
-		printk(KERN_WARNING "microcode: CPU%d loading of chipset "
+		printk(KERN_ERR "microcode: CPU%d: loading of chipset "
 		       "specific code not yet supported\n", cpu);
 		return 0;
 	}
@@ -172,15 +167,13 @@ static void apply_microcode_amd(int cpu)
 
 	/* check current patch id and patch's id for match */
 	if (rev != mc_amd->hdr.patch_id) {
-		printk(KERN_ERR "microcode: CPU%d update from revision "
-		       "0x%x to 0x%x failed\n", cpu_num,
-		       mc_amd->hdr.patch_id, rev);
+		printk(KERN_ERR "microcode: CPU%d: update failed "
+		       "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id);
 		return;
 	}
 
-	printk(KERN_INFO "microcode: CPU%d updated from revision "
-	       "0x%x to 0x%x\n",
-	       cpu_num, uci->cpu_sig.rev, mc_amd->hdr.patch_id);
+	printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n",
+	       cpu, rev);
 
 	uci->cpu_sig.rev = rev;
 }
@@ -202,18 +195,18 @@ static void *get_next_ucode(const u8 *buf, unsigned int size,
 		return NULL;
 
 	if (section_hdr[0] != UCODE_UCODE_TYPE) {
-		printk(KERN_ERR "microcode: error! "
-		       "Wrong microcode payload type field\n");
+		printk(KERN_ERR "microcode: error: invalid type field in "
+		       "container file section header\n");
 		return NULL;
 	}
 
 	total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8));
 
-	printk(KERN_INFO "microcode: size %u, total_size %u\n",
-		size, total_size);
+	printk(KERN_DEBUG "microcode: size %u, total_size %u\n",
+	       size, total_size);
 
 	if (total_size > size || total_size > UCODE_MAX_SIZE) {
-		printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
+		printk(KERN_ERR "microcode: error: size mismatch\n");
 		return NULL;
 	}
 
@@ -243,14 +236,15 @@ static int install_equiv_cpu_table(const u8 *buf)
 	size = buf_pos[2];
 
 	if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
-		printk(KERN_ERR "microcode: error! "
-		       "Wrong microcode equivalent cpu table\n");
+		printk(KERN_ERR "microcode: error: invalid type field in "
+		       "container file section header\n");
 		return 0;
 	}
 
 	equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size);
 	if (!equiv_cpu_table) {
-		printk(KERN_ERR "microcode: error, can't allocate memory for equiv CPU table\n");
+		printk(KERN_ERR "microcode: failed to allocate "
+		       "equivalent CPU table\n");
 		return 0;
 	}
 
@@ -283,7 +277,8 @@ static int generic_load_microcode(int cpu, const u8 *data, size_t size)
 
 	offset = install_equiv_cpu_table(ucode_ptr);
 	if (!offset) {
-		printk(KERN_ERR "microcode: installing equivalent cpu table failed\n");
+		printk(KERN_ERR "microcode: failed to create "
+		       "equivalent cpu table\n");
 		return -EINVAL;
 	}
 
@@ -339,8 +334,7 @@ static int request_microcode_fw(int cpu, struct device *device)
 
 	ret = request_firmware(&firmware, fw_name, device);
 	if (ret) {
-		printk(KERN_ERR "microcode: ucode data file %s load failed\n",
-		       fw_name);
+		printk(KERN_ERR "microcode: failed to load file %s\n", fw_name);
 		return ret;
 	}
 
@@ -353,8 +347,8 @@ static int request_microcode_fw(int cpu, struct device *device)
 
 static int request_microcode_user(int cpu, const void __user *buf, size_t size)
 {
-	printk(KERN_WARNING "microcode: AMD microcode update via "
-	       "/dev/cpu/microcode is not supported\n");
+	printk(KERN_INFO "microcode: AMD microcode update via "
+	       "/dev/cpu/microcode not supported\n");
 	return -1;
 }
 
-- 
cgit v1.2.3-70-g09d2


From d4377974062122d6d9be0bbd8a910a0954714194 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 16 Dec 2008 20:59:24 +0100
Subject: x86: support always running TSC on Intel CPUs, add cpufeature
 definition

Impact: add new synthetic-cpuid bit definition

add X86_FEATURE_NONSTOP_TSC to the cpufeature bits - this is in
preparation of Venki's always-running-TSC patch.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/cpufeature.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 5bce8ed02b44..ea408dcba513 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -92,6 +92,7 @@
 #define X86_FEATURE_AMDC1E	(3*32+21) /* AMD C1E detected */
 #define X86_FEATURE_XTOPOLOGY	(3*32+22) /* cpu topology enum extensions */
 #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC	(3*32+24) /* TSC does not stop in C states */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
-- 
cgit v1.2.3-70-g09d2


From 40fb17152c50a69dc304dd632131c2f41281ce44 Mon Sep 17 00:00:00 2001
From: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Mon, 17 Nov 2008 16:11:37 -0800
Subject: x86: support always running TSC on Intel CPUs

Impact: reward non-stop TSCs with good TSC-based clocksources, etc.

Add support for CPUID_0x80000007_Bit8 on Intel CPUs as well. This bit means
that the TSC is invariant with C/P/T states and always runs at constant
frequency.

With Intel CPUs, we have 3 classes
* CPUs where TSC runs at constant rate and does not stop n C-states
* CPUs where TSC runs at constant rate, but will stop in deep C-states
* CPUs where TSC rate will vary based on P/T-states and TSC will stop in deep
  C-states.

To cover these 3, one feature bit (CONSTANT_TSC) is not enough. So, add a
second bit (NONSTOP_TSC). CONSTANT_TSC indicates that the TSC runs at
constant frequency irrespective of P/T-states, and NONSTOP_TSC indicates
that TSC does not stop in deep C-states.

CPUID_0x8000000_Bit8 indicates both these feature bit can be set.
We still have CONSTANT_TSC _set_ and NONSTOP_TSC _not_set_ on some older Intel
CPUs, based on model checks. We can use TSC on such CPUs for time, as long as
those CPUs do not support/enter deep C-states.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd.c     |  9 +++++++--
 arch/x86/kernel/cpu/intel.c   | 10 ++++++++++
 arch/x86/kernel/process.c     |  2 +-
 drivers/acpi/processor_idle.c |  6 +++---
 4 files changed, 21 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 8f1e31db2ad5..7c878f6aa919 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -283,9 +283,14 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
 {
 	early_init_amd_mc(c);
 
-	/* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
-	if (c->x86_power & (1<<8))
+	/*
+	 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
+	 * with P/T states and does not stop in deep C-states
+	 */
+	if (c->x86_power & (1 << 8)) {
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+	}
 
 #ifdef CONFIG_X86_64
 	set_cpu_cap(c, X86_FEATURE_SYSCALL32);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index cce0b6118d55..caec59437a22 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -41,6 +41,16 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 	if (c->x86 == 15 && c->x86_cache_alignment == 64)
 		c->x86_cache_alignment = 128;
 #endif
+
+	/*
+	 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
+	 * with P/T states and does not stop in deep C-states
+	 */
+	if (c->x86_power & (1 << 8)) {
+		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+	}
+
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c622772744d8..18c70fedba32 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -270,7 +270,7 @@ static void c1e_idle(void)
 		rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
 		if (lo & K8_INTP_C1E_ACTIVE_MASK) {
 			c1e_detected = 1;
-			if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+			if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
 				mark_tsc_unstable("TSC halt in AMD C1E");
 			printk(KERN_INFO "System has AMD C1E enabled\n");
 			set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E);
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 5f8d746a9b81..38aca048e951 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -374,15 +374,15 @@ static int tsc_halts_in_c(int state)
 {
 	switch (boot_cpu_data.x86_vendor) {
 	case X86_VENDOR_AMD:
+	case X86_VENDOR_INTEL:
 		/*
 		 * AMD Fam10h TSC will tick in all
 		 * C/P/S0/S1 states when this bit is set.
 		 */
-		if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+		if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
 			return 0;
+
 		/*FALL THROUGH*/
-	case X86_VENDOR_INTEL:
-		/* Several cases known where TSC halts in C2 too */
 	default:
 		return state > ACPI_STATE_C1;
 	}
-- 
cgit v1.2.3-70-g09d2


From f63c2f248959366cd11bfa476f866737047cf663 Mon Sep 17 00:00:00 2001
From: Tej <bewith.tej@gmail.com>
Date: Tue, 16 Dec 2008 11:56:06 -0800
Subject: xen: whitespace/checkpatch cleanup

Impact: cleanup

Signed-off-by: Tej <bewith.tej@gmail.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c  | 16 +++++++++-------
 arch/x86/xen/mmu.c        | 17 ++++++++++-------
 arch/x86/xen/multicalls.c |  2 +-
 arch/x86/xen/setup.c      |  9 +++++----
 4 files changed, 25 insertions(+), 19 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 5e4686d70f62..86cd2f829683 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -793,7 +793,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
 
 	ret = 0;
 
-	switch(msr) {
+	switch (msr) {
 #ifdef CONFIG_X86_64
 		unsigned which;
 		u64 base;
@@ -1453,7 +1453,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 
 	ident_pte = 0;
 	pfn = 0;
-	for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
+	for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
 		pte_t *pte_page;
 
 		/* Reuse or allocate a page of ptes */
@@ -1471,7 +1471,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 		}
 
 		/* Install mappings */
-		for(pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
+		for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
 			pte_t pte;
 
 			if (pfn > max_pfn_mapped)
@@ -1485,7 +1485,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 		}
 	}
 
-	for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
+	for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
 		set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
 
 	set_page_prot(pmd, PAGE_KERNEL_RO);
@@ -1499,7 +1499,7 @@ static void convert_pfn_mfn(void *v)
 
 	/* All levels are converted the same way, so just treat them
 	   as ptes. */
-	for(i = 0; i < PTRS_PER_PTE; i++)
+	for (i = 0; i < PTRS_PER_PTE; i++)
 		pte[i] = xen_make_pte(pte[i].pte);
 }
 
@@ -1514,7 +1514,8 @@ static void convert_pfn_mfn(void *v)
  * of the physical mapping once some sort of allocator has been set
  * up.
  */
-static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
+static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+						unsigned long max_pfn)
 {
 	pud_t *l3;
 	pmd_t *l2;
@@ -1577,7 +1578,8 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pf
 #else	/* !CONFIG_X86_64 */
 static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
 
-static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
+static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+						unsigned long max_pfn)
 {
 	pmd_t *kernel_pmd;
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 636ef4caa52d..773d68d3e912 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -154,13 +154,13 @@ void xen_setup_mfn_list_list(void)
 {
 	unsigned pfn, idx;
 
-	for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
+	for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
 		unsigned topidx = p2m_top_index(pfn);
 
 		p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
 	}
 
-	for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
+	for (idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
 		unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
 		p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
 	}
@@ -179,7 +179,7 @@ void __init xen_build_dynamic_phys_to_machine(void)
 	unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
 	unsigned pfn;
 
-	for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
+	for (pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
 		unsigned topidx = p2m_top_index(pfn);
 
 		p2m_top[topidx] = &mfn_list[pfn];
@@ -207,7 +207,7 @@ static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
 	p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
 	BUG_ON(p == NULL);
 
-	for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
+	for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
 		p[i] = INVALID_P2M_ENTRY;
 
 	if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
@@ -407,7 +407,8 @@ out:
 		preempt_enable();
 }
 
-pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
+				 unsigned long addr, pte_t *ptep)
 {
 	/* Just return the pte as-is.  We preserve the bits on commit */
 	return *ptep;
@@ -878,7 +879,8 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
 
 		if (user_pgd) {
 			xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
-			xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd)));
+			xen_do_pin(MMUEXT_PIN_L4_TABLE,
+				   PFN_DOWN(__pa(user_pgd)));
 		}
 	}
 #else /* CONFIG_X86_32 */
@@ -993,7 +995,8 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
 		pgd_t *user_pgd = xen_get_user_pgd(pgd);
 
 		if (user_pgd) {
-			xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd)));
+			xen_do_pin(MMUEXT_UNPIN_TABLE,
+				   PFN_DOWN(__pa(user_pgd)));
 			xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
 		}
 	}
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 8ea8a0d0b0de..c738644b5435 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -154,7 +154,7 @@ void xen_mc_flush(void)
 			       ret, smp_processor_id());
 			dump_stack();
 			for (i = 0; i < b->mcidx; i++) {
-				printk("  call %2d/%d: op=%lu arg=[%lx] result=%ld\n",
+				printk(KERN_DEBUG "  call %2d/%d: op=%lu arg=[%lx] result=%ld\n",
 				       i+1, b->mcidx,
 				       b->debug[i].op,
 				       b->debug[i].args[0],
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index d67901083888..15c6c68db6a2 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -28,6 +28,9 @@
 /* These are code, but not functions.  Defined in entry.S */
 extern const char xen_hypervisor_callback[];
 extern const char xen_failsafe_callback[];
+extern void xen_sysenter_target(void);
+extern void xen_syscall_target(void);
+extern void xen_syscall32_target(void);
 
 
 /**
@@ -110,7 +113,6 @@ static __cpuinit int register_callback(unsigned type, const void *func)
 
 void __cpuinit xen_enable_sysenter(void)
 {
-	extern void xen_sysenter_target(void);
 	int ret;
 	unsigned sysenter_feature;
 
@@ -132,8 +134,6 @@ void __cpuinit xen_enable_syscall(void)
 {
 #ifdef CONFIG_X86_64
 	int ret;
-	extern void xen_syscall_target(void);
-	extern void xen_syscall32_target(void);
 
 	ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
 	if (ret != 0) {
@@ -160,7 +160,8 @@ void __init xen_arch_setup(void)
 	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
 
 	if (!xen_feature(XENFEAT_auto_translated_physmap))
-		HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3);
+		HYPERVISOR_vm_assist(VMASST_CMD_enable,
+				     VMASST_TYPE_pae_extended_cr3);
 
 	if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
 	    register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
-- 
cgit v1.2.3-70-g09d2


From aab02f0ae20b8fe0fe891e9f107c6e392256ca01 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh <jaswinder@infradead.org>
Date: Mon, 15 Dec 2008 22:23:54 +0530
Subject: x86: process_64.c declare __switch_to() and sys_arch_prctl before
 they get used

Impact: cleanup

In asm/system.h moved out __switch_to from CONFIG_X86_32 as it is common for
both 32 and 64 bit.

In asm/pctl.h defined sys_arch_prctl
Signed-off-by: Jaswinder Singh <jaswinder@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/prctl.h  | 3 +++
 arch/x86/include/asm/system.h | 4 ++--
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/prctl.h b/arch/x86/include/asm/prctl.h
index fe681147a4f7..a8894647dd9a 100644
--- a/arch/x86/include/asm/prctl.h
+++ b/arch/x86/include/asm/prctl.h
@@ -6,5 +6,8 @@
 #define ARCH_GET_FS 0x1003
 #define ARCH_GET_GS 0x1004
 
+#ifdef CONFIG_X86_64
+extern long sys_arch_prctl(int, unsigned long);
+#endif /* CONFIG_X86_64 */
 
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 2ed3f0f44ff7..59555f48bf4c 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -17,12 +17,12 @@
 # define AT_VECTOR_SIZE_ARCH 1
 #endif
 
-#ifdef CONFIG_X86_32
-
 struct task_struct; /* one of the stranger aspects of C forward declarations */
 struct task_struct *__switch_to(struct task_struct *prev,
 				struct task_struct *next);
 
+#ifdef CONFIG_X86_32
+
 /*
  * Saving eflags is important. It switches not only IOPL between tasks,
  * it also protects other tasks from NT leaking through sysenter etc.
-- 
cgit v1.2.3-70-g09d2


From 7b5b50f1be9e07714cfaa620d102c8daf3cdd814 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh <jaswinder@infradead.org>
Date: Mon, 15 Dec 2008 22:24:48 +0530
Subject: x86: signal.c declare do_notify_resume before they get used

Impact: cleanup

In asm/signal.h moved out do_notify_resume from __i386__ as it is common
for both 32 and 64 bit.

Signed-off-by: Jaswinder Singh <jaswinder@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

 arch/x86/include/asm/signal.h |    6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)
---
 arch/x86/include/asm/signal.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 96ac44f275da..7761a5d554bb 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -121,6 +121,10 @@ typedef unsigned long sigset_t;
 
 #ifndef __ASSEMBLY__
 
+# ifdef __KERNEL__
+extern void do_notify_resume(struct pt_regs *, void *, __u32);
+# endif /* __KERNEL__ */
+
 #ifdef __i386__
 # ifdef __KERNEL__
 struct old_sigaction {
@@ -141,8 +145,6 @@ struct k_sigaction {
 	struct sigaction sa;
 };
 
-extern void do_notify_resume(struct pt_regs *, void *, __u32);
-
 # else /* __KERNEL__ */
 /* Here we must cater to libcs that poke about in kernel headers.  */
 
-- 
cgit v1.2.3-70-g09d2


From c0195b6da08c4ddd8c8ea830f6c3c40bc7f82071 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh <jaswinder@infradead.org>
Date: Mon, 15 Dec 2008 22:26:30 +0530
Subject: x86: ldt.c declare sys_modify_ldt before they get used

Impact: cleanup

In asm/syscalls.h moved out sys_modify_ldt from CONFIG_X86_32 as it is
common for both 32 and 64 bit.

Signed-off-by: Jaswinder Singh <jaswinder@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/syscalls.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 87803da44010..75d4a6afc36f 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -19,6 +19,9 @@
 /* kernel/ioport.c */
 asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
 
+/* kernel/ldt.c */
+asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
+
 /* X86_32 only */
 #ifdef CONFIG_X86_32
 /* kernel/process_32.c */
@@ -38,9 +41,6 @@ asmlinkage int sys_rt_sigreturn(unsigned long);
 /* kernel/ioport.c */
 asmlinkage long sys_iopl(unsigned long);
 
-/* kernel/ldt.c */
-asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
-
 /* kernel/sys_i386_32.c */
 asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
 			  unsigned long, unsigned long, unsigned long);
-- 
cgit v1.2.3-70-g09d2


From a9b43c7d9890066709609df849959009645c1a19 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh <jaswinder@infradead.org>
Date: Mon, 15 Dec 2008 23:11:10 +0530
Subject: x86: setup.c find_and_reserve_crashkernel should be static

Impact: cleanup, reduce kernel size a bit

Signed-off-by: Jaswinder Singh <jaswinder@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 9d5674f7b6cc..81f5d22747ae 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -448,6 +448,7 @@ static void __init reserve_early_setup_data(void)
  * @size: Size of the crashkernel memory to reserve.
  * Returns the base address on success, and -1ULL on failure.
  */
+static
 unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
 {
 	const unsigned long long alignment = 16<<20; 	/* 16M */
-- 
cgit v1.2.3-70-g09d2


From ecbf29cdb3990c83d90d0c4187c89fb2ce423367 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 16 Dec 2008 12:37:07 -0800
Subject: xen: clean up asm/xen/hypervisor.h

Impact: cleanup

hypervisor.h had accumulated a lot of crud, including lots of spurious
#includes.  Clean it all up, and go around fixing up everything else
accordingly.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/xen/hypercall.h  |  6 ++++++
 arch/x86/include/asm/xen/hypervisor.h | 39 +++++++----------------------------
 arch/x86/include/asm/xen/page.h       |  5 +++++
 arch/x86/xen/enlighten.c              |  1 +
 drivers/xen/balloon.c                 |  4 +++-
 drivers/xen/features.c                |  6 +++++-
 drivers/xen/grant-table.c             |  1 +
 include/xen/interface/event_channel.h |  2 ++
 8 files changed, 31 insertions(+), 33 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 3f6000d95fe2..5e79ca694326 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -33,8 +33,14 @@
 #ifndef _ASM_X86_XEN_HYPERCALL_H
 #define _ASM_X86_XEN_HYPERCALL_H
 
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
 #include <linux/errno.h>
 #include <linux/string.h>
+#include <linux/types.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
 
 #include <xen/interface/xen.h>
 #include <xen/interface/sched.h>
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index a38d25ac87d2..81fbd735aec4 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -33,39 +33,10 @@
 #ifndef _ASM_X86_XEN_HYPERVISOR_H
 #define _ASM_X86_XEN_HYPERVISOR_H
 
-#include <linux/types.h>
-#include <linux/kernel.h>
-
-#include <xen/interface/xen.h>
-#include <xen/interface/version.h>
-
-#include <asm/ptrace.h>
-#include <asm/page.h>
-#include <asm/desc.h>
-#if defined(__i386__)
-#  ifdef CONFIG_X86_PAE
-#   include <asm-generic/pgtable-nopud.h>
-#  else
-#   include <asm-generic/pgtable-nopmd.h>
-#  endif
-#endif
-#include <asm/xen/hypercall.h>
-
 /* arch/i386/kernel/setup.c */
 extern struct shared_info *HYPERVISOR_shared_info;
 extern struct start_info *xen_start_info;
 
-/* arch/i386/mach-xen/evtchn.c */
-/* Force a proper event-channel callback from Xen. */
-extern void force_evtchn_callback(void);
-
-/* Turn jiffies into Xen system time. */
-u64 jiffies_to_st(unsigned long jiffies);
-
-
-#define MULTI_UVMFLAGS_INDEX 3
-#define MULTI_UVMDOMID_INDEX 4
-
 enum xen_domain_type {
 	XEN_NATIVE,
 	XEN_PV_DOMAIN,
@@ -74,9 +45,15 @@ enum xen_domain_type {
 
 extern enum xen_domain_type xen_domain_type;
 
+#ifdef CONFIG_XEN
 #define xen_domain()		(xen_domain_type != XEN_NATIVE)
-#define xen_pv_domain()		(xen_domain_type == XEN_PV_DOMAIN)
+#else
+#define xen_domain()		(0)
+#endif
+
+#define xen_pv_domain()		(xen_domain() && xen_domain_type == XEN_PV_DOMAIN)
+#define xen_hvm_domain()	(xen_domain() && xen_domain_type == XEN_HVM_DOMAIN)
+
 #define xen_initial_domain()	(xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN)
-#define xen_hvm_domain()	(xen_domain_type == XEN_HVM_DOMAIN)
 
 #endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index bc628998a1b9..7ef617ef1df3 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -1,11 +1,16 @@
 #ifndef _ASM_X86_XEN_PAGE_H
 #define _ASM_X86_XEN_PAGE_H
 
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
 #include <linux/pfn.h>
 
 #include <asm/uaccess.h>
+#include <asm/page.h>
 #include <asm/pgtable.h>
 
+#include <xen/interface/xen.h>
 #include <xen/features.h>
 
 /* Xen machine address */
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 86cd2f829683..bea215230b20 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -28,6 +28,7 @@
 #include <linux/console.h>
 
 #include <xen/interface/xen.h>
+#include <xen/interface/version.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/vcpu.h>
 #include <xen/features.h>
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 526c191e84ea..8dc7109d61b7 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -44,13 +44,15 @@
 #include <linux/list.h>
 #include <linux/sysdev.h>
 
-#include <asm/xen/hypervisor.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
 #include <asm/tlb.h>
 
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+#include <xen/interface/xen.h>
 #include <xen/interface/memory.h>
 #include <xen/xenbus.h>
 #include <xen/features.h>
diff --git a/drivers/xen/features.c b/drivers/xen/features.c
index 0707714e40d6..99eda169c779 100644
--- a/drivers/xen/features.c
+++ b/drivers/xen/features.c
@@ -8,7 +8,11 @@
 #include <linux/types.h>
 #include <linux/cache.h>
 #include <linux/module.h>
-#include <asm/xen/hypervisor.h>
+
+#include <asm/xen/hypercall.h>
+
+#include <xen/interface/xen.h>
+#include <xen/interface/version.h>
 #include <xen/features.h>
 
 u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 06592b9da83c..7d8f531fb8e8 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -40,6 +40,7 @@
 #include <xen/interface/xen.h>
 #include <xen/page.h>
 #include <xen/grant_table.h>
+#include <asm/xen/hypercall.h>
 
 #include <asm/pgtable.h>
 #include <asm/sync_bitops.h>
diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h
index 919b5bdcb2bd..2090881c3650 100644
--- a/include/xen/interface/event_channel.h
+++ b/include/xen/interface/event_channel.h
@@ -9,6 +9,8 @@
 #ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__
 #define __XEN_PUBLIC_EVENT_CHANNEL_H__
 
+#include <xen/interface/xen.h>
+
 typedef uint32_t evtchn_port_t;
 DEFINE_GUEST_HANDLE(evtchn_port_t);
 
-- 
cgit v1.2.3-70-g09d2


From 8ae936690972dfcad73d0dde1095b9f32af5ee95 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Fri, 12 Dec 2008 15:52:26 -0800
Subject: x86: hardirq: use inc_irq_stat() in non-unified functions

Impact: cleanup

Replace incrementing irq stat with inc_irq_stat() in non-unified functions.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mcheck/mce_amd_64.c   | 2 +-
 arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 2 +-
 arch/x86/kernel/time_32.c                 | 2 +-
 arch/x86/kernel/time_64.c                 | 2 +-
 arch/x86/kernel/tlb_32.c                  | 2 +-
 arch/x86/kernel/tlb_64.c                  | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 5eb390a4b2e9..748c8f9e7a05 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -237,7 +237,7 @@ asmlinkage void mce_threshold_interrupt(void)
 		}
 	}
 out:
-	add_pda(irq_threshold_count, 1);
+	inc_irq_stat(irq_threshold_count);
 	irq_exit();
 }
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index c17eaf5dd6dd..4b48f251fd39 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -26,7 +26,7 @@ asmlinkage void smp_thermal_interrupt(void)
 	if (therm_throt_process(msr_val & 1))
 		mce_log_therm_throt_event(smp_processor_id(), msr_val);
 
-	add_pda(irq_thermal_count, 1);
+	inc_irq_stat(irq_thermal_count);
 	irq_exit();
 }
 
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 77b400f06ea2..65309e4cb1c0 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -75,7 +75,7 @@ EXPORT_SYMBOL(profile_pc);
 irqreturn_t timer_interrupt(int irq, void *dev_id)
 {
 	/* Keep nmi watchdog up to date */
-	per_cpu(irq_stat, smp_processor_id()).irq0_irqs++;
+	inc_irq_stat(irq0_irqs);
 
 #ifdef CONFIG_X86_IO_APIC
 	if (timer_ack) {
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index 418a095c5796..1749cacde8b9 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -51,7 +51,7 @@ EXPORT_SYMBOL(profile_pc);
 
 irqreturn_t timer_interrupt(int irq, void *dev_id)
 {
-	add_pda(irq0_irqs, 1);
+	inc_irq_stat(irq0_irqs);
 
 	global_clock_event->event_handler(global_clock_event);
 
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index f4049f3513b6..f374f83fca42 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -119,7 +119,7 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
 	smp_mb__after_clear_bit();
 out:
 	put_cpu_no_resched();
-	__get_cpu_var(irq_stat).irq_tlb_count++;
+	inc_irq_stat(irq_tlb_count);
 }
 
 void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 8f919ca69494..29887d7081a9 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -154,7 +154,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
 out:
 	ack_APIC_irq();
 	cpu_clear(cpu, f->flush_cpumask);
-	add_pda(irq_tlb_count, 1);
+	inc_irq_stat(irq_tlb_count);
 }
 
 void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
-- 
cgit v1.2.3-70-g09d2


From fde9071167c4624281553b23232aa8b81e71c790 Mon Sep 17 00:00:00 2001
From: Zachary Amsden <zach@vmware.com>
Date: Fri, 12 Dec 2008 11:26:35 -0800
Subject: x86: clean up dead code in vmi_32.c

Impact: cleanup, remove dead debug code

I ran across some old debugging code in vmi paravirt-ops code that was
already dead, but still potentially useful.  After reviewing recent
changes to the way kernel page tables are allocated and initialized, and
the lack of bugs caught by this debugging code, I've concluded it is now
totally useless to have around, and it's already been #if 0'd for quite
some time.

There's no rush to get this in mainline, but it's also totally harmless,
so I'll let the x86 maintainers decide where it should be tucked.  I've
been out of the mainstream dev loop for a couple months, so apologies if
I haven't got any protocol changes in order.

Remove mummified remains found in vmi_32.c

Signed-off-by: Zachary Amsden <zach@vmware.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/vmi_32.c | 119 -----------------------------------------------
 1 file changed, 119 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 8b6c393ab9fd..8087e0cd877d 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -266,109 +266,6 @@ static void vmi_nop(void)
 {
 }
 
-#ifdef CONFIG_DEBUG_PAGE_TYPE
-
-#ifdef CONFIG_X86_PAE
-#define MAX_BOOT_PTS (2048+4+1)
-#else
-#define MAX_BOOT_PTS (1024+1)
-#endif
-
-/*
- * During boot, mem_map is not yet available in paging_init, so stash
- * all the boot page allocations here.
- */
-static struct {
-	u32 pfn;
-	int type;
-} boot_page_allocations[MAX_BOOT_PTS];
-static int num_boot_page_allocations;
-static int boot_allocations_applied;
-
-void vmi_apply_boot_page_allocations(void)
-{
-	int i;
-	BUG_ON(!mem_map);
-	for (i = 0; i < num_boot_page_allocations; i++) {
-		struct page *page = pfn_to_page(boot_page_allocations[i].pfn);
-		page->type = boot_page_allocations[i].type;
-		page->type = boot_page_allocations[i].type &
-				~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE);
-	}
-	boot_allocations_applied = 1;
-}
-
-static void record_page_type(u32 pfn, int type)
-{
-	BUG_ON(num_boot_page_allocations >= MAX_BOOT_PTS);
-	boot_page_allocations[num_boot_page_allocations].pfn = pfn;
-	boot_page_allocations[num_boot_page_allocations].type = type;
-	num_boot_page_allocations++;
-}
-
-static void check_zeroed_page(u32 pfn, int type, struct page *page)
-{
-	u32 *ptr;
-	int i;
-	int limit = PAGE_SIZE / sizeof(int);
-
-	if (page_address(page))
-		ptr = (u32 *)page_address(page);
-	else
-		ptr = (u32 *)__va(pfn << PAGE_SHIFT);
-	/*
-	 * When cloning the root in non-PAE mode, only the userspace
-	 * pdes need to be zeroed.
-	 */
-	if (type & VMI_PAGE_CLONE)
-		limit = KERNEL_PGD_BOUNDARY;
-	for (i = 0; i < limit; i++)
-		BUG_ON(ptr[i]);
-}
-
-/*
- * We stash the page type into struct page so we can verify the page
- * types are used properly.
- */
-static void vmi_set_page_type(u32 pfn, int type)
-{
-	/* PAE can have multiple roots per page - don't track */
-	if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP))
-		return;
-
-	if (boot_allocations_applied) {
-		struct page *page = pfn_to_page(pfn);
-		if (type != VMI_PAGE_NORMAL)
-			BUG_ON(page->type);
-		else
-			BUG_ON(page->type == VMI_PAGE_NORMAL);
-		page->type = type & ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE);
-		if (type & VMI_PAGE_ZEROED)
-			check_zeroed_page(pfn, type, page);
-	} else {
-		record_page_type(pfn, type);
-	}
-}
-
-static void vmi_check_page_type(u32 pfn, int type)
-{
-	/* PAE can have multiple roots per page - skip checks */
-	if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP))
-		return;
-
-	type &= ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE);
-	if (boot_allocations_applied) {
-		struct page *page = pfn_to_page(pfn);
-		BUG_ON((page->type ^ type) & VMI_PAGE_PAE);
-		BUG_ON(type == VMI_PAGE_NORMAL && page->type);
-		BUG_ON((type & page->type) == 0);
-	}
-}
-#else
-#define vmi_set_page_type(p,t) do { } while (0)
-#define vmi_check_page_type(p,t) do { } while (0)
-#endif
-
 #ifdef CONFIG_HIGHPTE
 static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
 {
@@ -395,7 +292,6 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
 
 static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn)
 {
-	vmi_set_page_type(pfn, VMI_PAGE_L1);
 	vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
 }
 
@@ -406,27 +302,22 @@ static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn)
 	 * It is called only for swapper_pg_dir, which already has
 	 * data on it.
 	 */
- 	vmi_set_page_type(pfn, VMI_PAGE_L2);
 	vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
 }
 
 static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count)
 {
- 	vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE);
-	vmi_check_page_type(clonepfn, VMI_PAGE_L2);
 	vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
 }
 
 static void vmi_release_pte(unsigned long pfn)
 {
 	vmi_ops.release_page(pfn, VMI_PAGE_L1);
-	vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
 }
 
 static void vmi_release_pmd(unsigned long pfn)
 {
 	vmi_ops.release_page(pfn, VMI_PAGE_L2);
-	vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
 }
 
 /*
@@ -450,26 +341,22 @@ static void vmi_release_pmd(unsigned long pfn)
 
 static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
-	vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
 	vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
 }
 
 static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
-	vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
 	vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0));
 }
 
 static void vmi_set_pte(pte_t *ptep, pte_t pte)
 {
 	/* XXX because of set_pmd_pte, this can be called on PT or PD layers */
-	vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE | VMI_PAGE_PD);
 	vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT);
 }
 
 static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
 {
-	vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
 	vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
 }
 
@@ -477,10 +364,8 @@ static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
 #ifdef CONFIG_X86_PAE
 	const pte_t pte = { .pte = pmdval.pmd };
-	vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD);
 #else
 	const pte_t pte = { pmdval.pud.pgd.pgd };
-	vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PGD);
 #endif
 	vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD);
 }
@@ -502,7 +387,6 @@ static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval)
 
 static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
 {
-	vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
 	vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1));
 }
 
@@ -510,21 +394,18 @@ static void vmi_set_pud(pud_t *pudp, pud_t pudval)
 {
 	/* Um, eww */
 	const pte_t pte = { .pte = pudval.pgd.pgd };
-	vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD);
 	vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP);
 }
 
 static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	const pte_t pte = { .pte = 0 };
-	vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
 	vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
 }
 
 static void vmi_pmd_clear(pmd_t *pmd)
 {
 	const pte_t pte = { .pte = 0 };
-	vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD);
 	vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD);
 }
 #endif
-- 
cgit v1.2.3-70-g09d2


From 189f67c4408806563a1f061f5c8bf184a6658477 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Fri, 12 Dec 2008 14:50:40 -0600
Subject: x86: UV fix for global physical addresses

Impact: fix UV boot crash

This fixes a UV bug related to generating global memory addresses
on partitioned systems. Partition systems do not have physical memory
at address 0. Instead, a chunk of high memory is remapped by the chipset
so that it appears to be at address 0. This remapping is INVISIBLE to most
of the OS. The only OS functions that need to be aware of the remaping are
functions that directly interface to the chipset. The GRU is one example.

Also, delete a couple of unused macros related to global memory addresses.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uv/uv_hub.h | 16 ++--------------
 arch/x86/kernel/genx2apic_uv_x.c |  3 +--
 2 files changed, 3 insertions(+), 16 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 52aa943c634f..777327ef05c1 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -210,7 +210,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
 static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr)
 {
 	if (paddr < uv_hub_info->lowmem_remap_top)
-		paddr += uv_hub_info->lowmem_remap_base;
+		paddr |= uv_hub_info->lowmem_remap_base;
 	return paddr | uv_hub_info->gnode_upper;
 }
 
@@ -218,19 +218,7 @@ static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr)
 /* socket virtual --> UV global physical address */
 static inline unsigned long uv_gpa(void *v)
 {
-	return __pa(v) | uv_hub_info->gnode_upper;
-}
-
-/* socket virtual --> UV global physical address */
-static inline void *uv_vgpa(void *v)
-{
-	return (void *)uv_gpa(v);
-}
-
-/* UV global physical address --> socket virtual */
-static inline void *uv_va(unsigned long gpa)
-{
-	return __va(gpa & uv_hub_info->gpa_mask);
+	return uv_soc_phys_ram_to_gpa(__pa(v));
 }
 
 /* pnode, offset --> socket virtual */
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 221299f4509f..dece17289731 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -540,8 +540,7 @@ void __init uv_system_init(void)
 		uv_blade_info[blade].nr_possible_cpus++;
 
 		uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
-		uv_cpu_hub_info(cpu)->lowmem_remap_top =
-					lowmem_redir_base + lowmem_redir_size;
+		uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
 		uv_cpu_hub_info(cpu)->m_val = m_val;
 		uv_cpu_hub_info(cpu)->n_val = m_val;
 		uv_cpu_hub_info(cpu)->numa_blade_id = blade;
-- 
cgit v1.2.3-70-g09d2


From c8182f0016fb65a721c4fbe487909a2d56178135 Mon Sep 17 00:00:00 2001
From: Russ Anderson <rja@sgi.com>
Date: Fri, 12 Dec 2008 11:07:00 -0600
Subject: sgi-xp: xpc needs to pass the physical address, not virtual

Impact: fix crash

xpc needs to pass the physical address, not virtual.

Testing uncovered this problem.  The virtual address happens to work
most of the time due to the way bios was masking off the node bits.
Passing the physical address makes it work all of the time.

Signed-off-by: Russ Anderson <rja@sgi.com>
Acked-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uv/bios.h | 2 +-
 arch/x86/kernel/bios_uv.c      | 4 +---
 drivers/misc/sgi-xp/xpc_uv.c   | 8 ++++----
 3 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index da1c4e8e78fc..7ed17ff502b9 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -100,7 +100,7 @@ extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64);
 
 extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *);
 extern s64 uv_bios_freq_base(u64, u64 *);
-extern int uv_bios_mq_watchlist_alloc(int, void *, unsigned int,
+extern int uv_bios_mq_watchlist_alloc(int, unsigned long, unsigned int,
 					unsigned long *);
 extern int uv_bios_mq_watchlist_free(int, int);
 extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect);
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index d22d0f1bbea0..2a0a2a3cac26 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -101,15 +101,13 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
 }
 
 int
-uv_bios_mq_watchlist_alloc(int blade, void *mq, unsigned int mq_size,
+uv_bios_mq_watchlist_alloc(int blade, unsigned long addr, unsigned int mq_size,
 			   unsigned long *intr_mmr_offset)
 {
 	union uv_watchlist_u size_blade;
-	unsigned long addr;
 	u64 watchlist;
 	s64 ret;
 
-	addr = (unsigned long)mq;
 	size_blade.size = mq_size;
 	size_blade.blade = blade;
 
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index 684b2dd17583..91a55b1b1037 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -119,16 +119,16 @@ xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq)
 	int ret;
 
 #if defined CONFIG_X86_64
-	ret = uv_bios_mq_watchlist_alloc(mq->mmr_blade, mq->address, mq->order,
-					 &mq->mmr_offset);
+	ret = uv_bios_mq_watchlist_alloc(mq->mmr_blade, uv_gpa(mq->address),
+					 mq->order, &mq->mmr_offset);
 	if (ret < 0) {
 		dev_err(xpc_part, "uv_bios_mq_watchlist_alloc() failed, "
 			"ret=%d\n", ret);
 		return ret;
 	}
 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
-	ret = sn_mq_watchlist_alloc(mq->mmr_blade, mq->address, mq->order,
-				    &mq->mmr_offset);
+	ret = sn_mq_watchlist_alloc(mq->mmr_blade, uv_gpa(mq->address),
+				    mq->order, &mq->mmr_offset);
 	if (ret < 0) {
 		dev_err(xpc_part, "sn_mq_watchlist_alloc() failed, ret=%d\n",
 			ret);
-- 
cgit v1.2.3-70-g09d2


From ae417bb487e3bb88dc862b83b4bf00d87ba67ec8 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Tue, 16 Dec 2008 14:02:16 -0800
Subject: x86: signal: use signal_fault() in sys_sigreturn()

Impact: cleanup

Call signal_fault() in error route of sys_sigreturn().
Change log level to KERN_EMERG if current is init.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b1cc6da64208..2725a294d734 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -594,17 +594,7 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
 	return ax;
 
 badframe:
-	if (show_unhandled_signals && printk_ratelimit()) {
-		printk("%s%s[%d] bad frame in sigreturn frame:"
-			"%p ip:%lx sp:%lx oeax:%lx",
-		    task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
-		    current->comm, task_pid_nr(current), frame, regs->ip,
-		    regs->sp, regs->orig_ax);
-		print_vma_addr(" in ", regs->ip);
-		printk(KERN_CONT "\n");
-	}
-
-	force_sig(SIGSEGV, current);
+	signal_fault(regs, frame, "sigreturn");
 
 	return 0;
 }
@@ -901,8 +891,9 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
 	struct task_struct *me = current;
 
 	if (show_unhandled_signals && printk_ratelimit()) {
-		printk(KERN_INFO
+		printk("%s"
 		       "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
+		       task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
 		       me->comm, me->pid, where, frame,
 		       regs->ip, regs->sp, regs->orig_ax);
 		print_vma_addr(" in ", regs->ip);
-- 
cgit v1.2.3-70-g09d2


From d0b48ca189523b638d8674fa41e94d1950a17038 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Tue, 16 Dec 2008 14:03:36 -0800
Subject: x86: ia32_signal: use __put_user() instead of __copy_to_user()

Impact: cleanup

__put_user() can be used for constant size 8, like arch/x86/kernel/signal.c.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 1267977e7708..e4f2a5045743 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -467,7 +467,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 	 * These are actually not used anymore, but left because some
 	 * gdb versions depend on them as a marker.
 	 */
-	err |= __copy_to_user(frame->retcode, &code, 8);
+	err |= __put_user(*((u64 *)&code), (u64 *)frame->retcode);
 	if (err)
 		return -EFAULT;
 
@@ -554,7 +554,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	 * Not actually used anymore, but left because some gdb
 	 * versions need it.
 	 */
-	err |= __copy_to_user(frame->retcode, &code, 8);
+	err |= __put_user(*((u64 *)&code), (u64 *)frame->retcode);
 	if (err)
 		return -EFAULT;
 
-- 
cgit v1.2.3-70-g09d2


From 8bee3f0a662ad9c3d6bb705b0530a3b90f089c55 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Tue, 16 Dec 2008 14:04:43 -0800
Subject: x86: ia32_signal: use proper macro __USER32_DS

Impact: cleanup

Use __USER32_DS instead of __USER_DS in ia32_signal.c.
No impact, because __USER32_DS is defined __USER_DS.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index e4f2a5045743..9c99c429a20d 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -396,7 +396,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
 	}
 
 	/* This is the legacy signal stack switching. */
-	else if ((regs->ss & 0xffff) != __USER_DS &&
+	else if ((regs->ss & 0xffff) != __USER32_DS &&
 		!(ka->sa.sa_flags & SA_RESTORER) &&
 		 ka->sa.sa_restorer)
 		sp = (unsigned long) ka->sa.sa_restorer;
-- 
cgit v1.2.3-70-g09d2


From f5223763a664da16771211f9d293e18cb242b246 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 17 Dec 2008 18:47:17 -0800
Subject: x86: signal: move ia32 func declarations into
 arch/x86/kernel/signal.c

Impact: cleanup

Move declarations of ia32_setup_rt_frame() and ia32_setup_frame() into
arch/x86/kernel/signal.c.

This is for future use of sigframe.h.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/sigframe.h | 5 -----
 arch/x86/kernel/signal.c   | 5 +++++
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h
index cc673aa55ce4..6dd7e2b70a4b 100644
--- a/arch/x86/kernel/sigframe.h
+++ b/arch/x86/kernel/sigframe.h
@@ -34,9 +34,4 @@ struct rt_sigframe {
 	struct siginfo info;
 	/* fp state follows here */
 };
-
-int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-		sigset_t *set, struct pt_regs *regs);
-int ia32_setup_frame(int sig, struct k_sigaction *ka,
-		sigset_t *set, struct pt_regs *regs);
 #endif
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 2725a294d734..848c2d64a289 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -671,6 +671,11 @@ static int signr_convert(int sig)
 #define is_ia32	0
 #endif /* CONFIG_IA32_EMULATION */
 
+int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+		sigset_t *set, struct pt_regs *regs);
+int ia32_setup_frame(int sig, struct k_sigaction *ka,
+		sigset_t *set, struct pt_regs *regs);
+
 #endif /* CONFIG_X86_32 */
 
 static int
-- 
cgit v1.2.3-70-g09d2


From a5c56eb36f999ae0ecac278e51fd1cf8feb16c2f Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 17 Dec 2008 18:49:55 -0800
Subject: x86: signal: rename sigframe and rt_sigframe on 32-bit

Impact: cleanup, prepare to move sigframe.h

On 32-bit, rename struct sigrame to struct sigframe_ia32, struct rt_sigframe
to struct rt_sigframe_ia32 and several structures.

And add helper macros to access the above data in arch/x86/kernel/signal.c.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/sigframe.h | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h
index 6dd7e2b70a4b..6718ed04b05b 100644
--- a/arch/x86/kernel/sigframe.h
+++ b/arch/x86/kernel/sigframe.h
@@ -1,8 +1,14 @@
 #ifdef CONFIG_X86_32
-struct sigframe {
-	char __user *pretcode;
+#define sigframe_ia32		sigframe
+#define rt_sigframe_ia32	rt_sigframe
+#define sigcontext_ia32		sigcontext
+#define _fpstate_ia32		_fpstate
+#define ucontext_ia32		ucontext
+
+struct sigframe_ia32 {
+	u32 pretcode;
 	int sig;
-	struct sigcontext sc;
+	struct sigcontext_ia32 sc;
 	/*
 	 * fpstate is unused. fpstate is moved/allocated after
 	 * retcode[] below. This movement allows to have the FP state and the
@@ -11,27 +17,27 @@ struct sigframe {
 	 * the offset of extramask[] in the sigframe and thus prevent any
 	 * legacy application accessing/modifying it.
 	 */
-	struct _fpstate fpstate_unused;
+	struct _fpstate_ia32 fpstate_unused;
 	unsigned long extramask[_NSIG_WORDS-1];
 	char retcode[8];
 	/* fp state follows here */
 };
 
-struct rt_sigframe {
-	char __user *pretcode;
+struct rt_sigframe_ia32 {
+	u32 pretcode;
 	int sig;
-	struct siginfo __user *pinfo;
-	void __user *puc;
+	u32 pinfo;
+	u32 puc;
 	struct siginfo info;
-	struct ucontext uc;
+	struct ucontext_ia32 uc;
 	char retcode[8];
 	/* fp state follows here */
 };
-#else
+#else /* !CONFIG_X86_32 */
 struct rt_sigframe {
 	char __user *pretcode;
 	struct ucontext uc;
 	struct siginfo info;
 	/* fp state follows here */
 };
-#endif
+#endif /* CONFIG_X86_32 */
-- 
cgit v1.2.3-70-g09d2


From 41af86fad3c40646b9748279e3862781e937a5d2 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 17 Dec 2008 18:50:32 -0800
Subject: x86: signal: move sigframe.h to arch/x86/include/asm

Impact: cleanup, move header file

Move arch/x86/kernel/sigframe.h to arch/x86/include/asm/sigframe.h.
It will be used in arch/x86/ia32/ia32_signal.c.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/sigframe.h  | 43 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/asm-offsets_32.c |  2 +-
 arch/x86/kernel/sigframe.h       | 43 ----------------------------------------
 arch/x86/kernel/signal.c         |  2 +-
 4 files changed, 45 insertions(+), 45 deletions(-)
 create mode 100644 arch/x86/include/asm/sigframe.h
 delete mode 100644 arch/x86/kernel/sigframe.h

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h
new file mode 100644
index 000000000000..6718ed04b05b
--- /dev/null
+++ b/arch/x86/include/asm/sigframe.h
@@ -0,0 +1,43 @@
+#ifdef CONFIG_X86_32
+#define sigframe_ia32		sigframe
+#define rt_sigframe_ia32	rt_sigframe
+#define sigcontext_ia32		sigcontext
+#define _fpstate_ia32		_fpstate
+#define ucontext_ia32		ucontext
+
+struct sigframe_ia32 {
+	u32 pretcode;
+	int sig;
+	struct sigcontext_ia32 sc;
+	/*
+	 * fpstate is unused. fpstate is moved/allocated after
+	 * retcode[] below. This movement allows to have the FP state and the
+	 * future state extensions (xsave) stay together.
+	 * And at the same time retaining the unused fpstate, prevents changing
+	 * the offset of extramask[] in the sigframe and thus prevent any
+	 * legacy application accessing/modifying it.
+	 */
+	struct _fpstate_ia32 fpstate_unused;
+	unsigned long extramask[_NSIG_WORDS-1];
+	char retcode[8];
+	/* fp state follows here */
+};
+
+struct rt_sigframe_ia32 {
+	u32 pretcode;
+	int sig;
+	u32 pinfo;
+	u32 puc;
+	struct siginfo info;
+	struct ucontext_ia32 uc;
+	char retcode[8];
+	/* fp state follows here */
+};
+#else /* !CONFIG_X86_32 */
+struct rt_sigframe {
+	char __user *pretcode;
+	struct ucontext uc;
+	struct siginfo info;
+	/* fp state follows here */
+};
+#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 6649d09ad88f..ee4df08feee6 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -11,7 +11,7 @@
 #include <linux/suspend.h>
 #include <linux/kbuild.h>
 #include <asm/ucontext.h>
-#include "sigframe.h"
+#include <asm/sigframe.h>
 #include <asm/pgtable.h>
 #include <asm/fixmap.h>
 #include <asm/processor.h>
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h
deleted file mode 100644
index 6718ed04b05b..000000000000
--- a/arch/x86/kernel/sigframe.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifdef CONFIG_X86_32
-#define sigframe_ia32		sigframe
-#define rt_sigframe_ia32	rt_sigframe
-#define sigcontext_ia32		sigcontext
-#define _fpstate_ia32		_fpstate
-#define ucontext_ia32		ucontext
-
-struct sigframe_ia32 {
-	u32 pretcode;
-	int sig;
-	struct sigcontext_ia32 sc;
-	/*
-	 * fpstate is unused. fpstate is moved/allocated after
-	 * retcode[] below. This movement allows to have the FP state and the
-	 * future state extensions (xsave) stay together.
-	 * And at the same time retaining the unused fpstate, prevents changing
-	 * the offset of extramask[] in the sigframe and thus prevent any
-	 * legacy application accessing/modifying it.
-	 */
-	struct _fpstate_ia32 fpstate_unused;
-	unsigned long extramask[_NSIG_WORDS-1];
-	char retcode[8];
-	/* fp state follows here */
-};
-
-struct rt_sigframe_ia32 {
-	u32 pretcode;
-	int sig;
-	u32 pinfo;
-	u32 puc;
-	struct siginfo info;
-	struct ucontext_ia32 uc;
-	char retcode[8];
-	/* fp state follows here */
-};
-#else /* !CONFIG_X86_32 */
-struct rt_sigframe {
-	char __user *pretcode;
-	struct ucontext uc;
-	struct siginfo info;
-	/* fp state follows here */
-};
-#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 848c2d64a289..89bb7668041d 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -35,7 +35,7 @@
 #include <asm/syscall.h>
 #include <asm/syscalls.h>
 
-#include "sigframe.h"
+#include <asm/sigframe.h>
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
-- 
cgit v1.2.3-70-g09d2


From c85c2ff877c9305f801f7d5b9e6382cb05a03d45 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 17 Dec 2008 18:51:08 -0800
Subject: x86: signal: prepare to include from ia32_signal.c

Impact: cleanup, prepare to use from ia32_signal.c

Make struct sigframe_ia32 and rt_sigframe_ia32 visible to ia32_signal.c.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/sigframe.h | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h
index 6718ed04b05b..491a0878c3aa 100644
--- a/arch/x86/include/asm/sigframe.h
+++ b/arch/x86/include/asm/sigframe.h
@@ -4,7 +4,15 @@
 #define sigcontext_ia32		sigcontext
 #define _fpstate_ia32		_fpstate
 #define ucontext_ia32		ucontext
+#else /* !CONFIG_X86_32 */
+
+#ifdef CONFIG_IA32_EMULATION
+#include <asm/ia32.h>
+#endif /* CONFIG_IA32_EMULATION */
+
+#endif /* CONFIG_X86_32 */
 
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
 struct sigframe_ia32 {
 	u32 pretcode;
 	int sig;
@@ -18,7 +26,11 @@ struct sigframe_ia32 {
 	 * legacy application accessing/modifying it.
 	 */
 	struct _fpstate_ia32 fpstate_unused;
+#ifdef CONFIG_IA32_EMULATION
+	unsigned int extramask[_COMPAT_NSIG_WORDS-1];
+#else /* !CONFIG_IA32_EMULATION */
 	unsigned long extramask[_NSIG_WORDS-1];
+#endif /* CONFIG_IA32_EMULATION */
 	char retcode[8];
 	/* fp state follows here */
 };
@@ -28,16 +40,22 @@ struct rt_sigframe_ia32 {
 	int sig;
 	u32 pinfo;
 	u32 puc;
+#ifdef CONFIG_IA32_EMULATION
+	compat_siginfo_t info;
+#else /* !CONFIG_IA32_EMULATION */
 	struct siginfo info;
+#endif /* CONFIG_IA32_EMULATION */
 	struct ucontext_ia32 uc;
 	char retcode[8];
 	/* fp state follows here */
 };
-#else /* !CONFIG_X86_32 */
+#endif /* defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) */
+
+#ifdef CONFIG_X86_64
 struct rt_sigframe {
 	char __user *pretcode;
 	struct ucontext uc;
 	struct siginfo info;
 	/* fp state follows here */
 };
-#endif /* CONFIG_X86_32 */
+#endif /* CONFIG_X86_64 */
-- 
cgit v1.2.3-70-g09d2


From 3b0d29ee1c73b6b90bfddd10f7b8e86632b6b694 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 17 Dec 2008 18:51:46 -0800
Subject: x86: ia32_signal: rename struct sigframe and rt_sigframe

Impact: cleanup, prepare to include sigframe.h

Rename struct sigframe to struct sigframe_ia32 and struct rt_sigframe to
struct rt_sigframe_ia32.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 9c99c429a20d..334a4aa2e75b 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -174,7 +174,7 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
  * Do a signal return; undo the signal stack.
  */
 
-struct sigframe
+struct sigframe_ia32
 {
 	u32 pretcode;
 	int sig;
@@ -185,7 +185,7 @@ struct sigframe
 	/* fp state follows here */
 };
 
-struct rt_sigframe
+struct rt_sigframe_ia32
 {
 	u32 pretcode;
 	int sig;
@@ -271,7 +271,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 
 asmlinkage long sys32_sigreturn(struct pt_regs *regs)
 {
-	struct sigframe __user *frame = (struct sigframe __user *)(regs->sp-8);
+	struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
 	sigset_t set;
 	unsigned int ax;
 
@@ -301,12 +301,12 @@ badframe:
 
 asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
 {
-	struct rt_sigframe __user *frame;
+	struct rt_sigframe_ia32 __user *frame;
 	sigset_t set;
 	unsigned int ax;
 	struct pt_regs tregs;
 
-	frame = (struct rt_sigframe __user *)(regs->sp - 4);
+	frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
 
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
@@ -418,7 +418,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
 int ia32_setup_frame(int sig, struct k_sigaction *ka,
 		     compat_sigset_t *set, struct pt_regs *regs)
 {
-	struct sigframe __user *frame;
+	struct sigframe_ia32 __user *frame;
 	void __user *restorer;
 	int err = 0;
 	void __user *fpstate = NULL;
@@ -497,7 +497,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 			compat_sigset_t *set, struct pt_regs *regs)
 {
-	struct rt_sigframe __user *frame;
+	struct rt_sigframe_ia32 __user *frame;
 	void __user *restorer;
 	int err = 0;
 	void __user *fpstate = NULL;
-- 
cgit v1.2.3-70-g09d2


From d98f9d84422c393103dc7569dc8444bac628f7ac Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 17 Dec 2008 18:52:45 -0800
Subject: x86: ia32_signal: use sigframe.h

Impact: cleanup

Use arch/x86/include/asm/sigframe.h instead of defining redundant structures.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c | 26 ++------------------------
 1 file changed, 2 insertions(+), 24 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 334a4aa2e75b..3b3878a63bc2 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -32,6 +32,8 @@
 #include <asm/proto.h>
 #include <asm/vdso.h>
 
+#include <asm/sigframe.h>
+
 #define DEBUG_SIG 0
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
@@ -173,30 +175,6 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
 /*
  * Do a signal return; undo the signal stack.
  */
-
-struct sigframe_ia32
-{
-	u32 pretcode;
-	int sig;
-	struct sigcontext_ia32 sc;
-	struct _fpstate_ia32 fpstate_unused; /* look at kernel/sigframe.h */
-	unsigned int extramask[_COMPAT_NSIG_WORDS-1];
-	char retcode[8];
-	/* fp state follows here */
-};
-
-struct rt_sigframe_ia32
-{
-	u32 pretcode;
-	int sig;
-	u32 pinfo;
-	u32 puc;
-	compat_siginfo_t info;
-	struct ucontext_ia32 uc;
-	char retcode[8];
-	/* fp state follows here */
-};
-
 #define COPY(x)			{		\
 	err |= __get_user(regs->x, &sc->x);	\
 }
-- 
cgit v1.2.3-70-g09d2


From 57a37505d19f4dfeee26f0fd7ea38ed6f1d10cbe Mon Sep 17 00:00:00 2001
From: Jaswinder Singh <jaswinder@infradead.org>
Date: Wed, 17 Dec 2008 23:17:21 +0530
Subject: x86: time_64.c timer_interrupt() should be static

Impact: cleanup, reduce kernel size a bit

Signed-off-by: Jaswinder Singh <jaswinder@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/time_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index cb19d650c216..083a4a5bb00f 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -49,7 +49,7 @@ unsigned long profile_pc(struct pt_regs *regs)
 }
 EXPORT_SYMBOL(profile_pc);
 
-irqreturn_t timer_interrupt(int irq, void *dev_id)
+static irqreturn_t timer_interrupt(int irq, void *dev_id)
 {
 	add_pda(irq0_irqs, 1);
 
-- 
cgit v1.2.3-70-g09d2


From 7c9c160c54fc545efc23881344593868e5f717bd Mon Sep 17 00:00:00 2001
From: Jaswinder Singh <jaswinder@infradead.org>
Date: Wed, 17 Dec 2008 23:18:52 +0530
Subject: x86: tls.c declare sys_set_thread_area and sys_get_thread_area before
 they get used

Impact: cleanup

In asm/syscalls.h move out sys_set_thread_area() and sys_get_thread_area()
as they are common for both 32 and 64 bit.

Signed-off-by: Jaswinder Singh <jaswinder@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/syscalls.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 75d4a6afc36f..c0b0bda754ee 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -22,6 +22,10 @@ asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
 /* kernel/ldt.c */
 asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
 
+/* kernel/tls.c */
+asmlinkage int sys_set_thread_area(struct user_desc __user *);
+asmlinkage int sys_get_thread_area(struct user_desc __user *);
+
 /* X86_32 only */
 #ifdef CONFIG_X86_32
 /* kernel/process_32.c */
@@ -54,10 +58,6 @@ asmlinkage int sys_uname(struct old_utsname __user *);
 struct oldold_utsname;
 asmlinkage int sys_olduname(struct oldold_utsname __user *);
 
-/* kernel/tls.c */
-asmlinkage int sys_set_thread_area(struct user_desc __user *);
-asmlinkage int sys_get_thread_area(struct user_desc __user *);
-
 /* kernel/vm86_32.c */
 asmlinkage int sys_vm86old(struct pt_regs);
 asmlinkage int sys_vm86(struct pt_regs);
-- 
cgit v1.2.3-70-g09d2


From f269b07e862c395d6981ab2c05d6bc34b0249e90 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Thu, 18 Dec 2008 18:35:06 +0100
Subject: x86: revert CONFIG_RELOCATABLE=y defconfig change

This commit:

commit 5cb04df8d3f03e37a19f2502591a84156be71772
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sun May 4 19:49:04 2008 +0200

   x86: defconfig updates

changed CONFIG_RELOCATABLE from n to y, which may lead to a mismatch
between the vmlinux debug information and the runtime location of the
kernel, even when the bootloader does not relocate the kernel.

Revert the specific change. Works for me with GRUB and qemu.

Reference: http://lkml.org/lkml/2008/11/25/243

Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/configs/i386_defconfig   | 2 +-
 arch/x86/configs/x86_64_defconfig | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 71fc39c70782..b30a08ed8eb4 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -298,7 +298,7 @@ CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
 # CONFIG_KEXEC_JUMP is not set
 CONFIG_PHYSICAL_START=0x1000000
-CONFIG_RELOCATABLE=y
+# CONFIG_RELOCATABLE is not set
 CONFIG_PHYSICAL_ALIGN=0x200000
 CONFIG_HOTPLUG_CPU=y
 # CONFIG_COMPAT_VDSO is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index b38bbabc1706..0e7dbc0a3e46 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -298,7 +298,7 @@ CONFIG_SCHED_HRTICK=y
 CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
 CONFIG_PHYSICAL_START=0x1000000
-CONFIG_RELOCATABLE=y
+# CONFIG_RELOCATABLE is not set
 CONFIG_PHYSICAL_ALIGN=0x200000
 CONFIG_HOTPLUG_CPU=y
 # CONFIG_COMPAT_VDSO is not set
-- 
cgit v1.2.3-70-g09d2


From 5c2628e8b4f670d0954053444289e2b018be957a Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Thu, 18 Dec 2008 09:18:35 -0800
Subject: x86: sigframe.h: add guard macro

Impact: cleanup

Add missing guard macro _ASM_X86_SIGFRAME_H.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/sigframe.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h
index 491a0878c3aa..3bd0f4276000 100644
--- a/arch/x86/include/asm/sigframe.h
+++ b/arch/x86/include/asm/sigframe.h
@@ -1,3 +1,6 @@
+#ifndef _ASM_X86_SIGFRAME_H
+#define _ASM_X86_SIGFRAME_H
+
 #ifdef CONFIG_X86_32
 #define sigframe_ia32		sigframe
 #define rt_sigframe_ia32	rt_sigframe
@@ -59,3 +62,5 @@ struct rt_sigframe {
 	/* fp state follows here */
 };
 #endif /* CONFIG_X86_64 */
+
+#endif /* _ASM_X86_SIGFRAME_H */
-- 
cgit v1.2.3-70-g09d2


From f0bc2202e0373eb8e9b1ddbec930e2e681357db8 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh <jaswinder@infradead.org>
Date: Wed, 17 Dec 2008 23:20:05 +0530
Subject: x86: process.c declare c1e_remove_cpu before they get used

Impact: cleanup, avoid sparse warning

Included asm/idle.h for c1e_remove_cpu() declaration. Fixes this
sparse warning:

  CHECK   arch/x86/kernel/process.c
  arch/x86/kernel/process.c:284:6: warning: symbol 'c1e_remove_cpu' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh <jaswinder@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/process.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c622772744d8..b06100f1d612 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -1,6 +1,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <asm/idle.h>
 #include <linux/smp.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
-- 
cgit v1.2.3-70-g09d2


From 5899329b19100c0b82dc78e9b21ed8b920c9ffb3 Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Thu, 18 Dec 2008 11:41:30 -0800
Subject: x86: PAT: implement track/untrack of pfnmap regions for x86 - v3

Impact: New mm functionality.

Hookup remap_pfn_range and vm_insert_pfn and corresponding copy and free
routines with reserve and free tracking.

reserve and free here only takes care of non RAM region mapping. For RAM
region, driver should use set_memory_[uc|wc|wb] to set the cache type and
then setup the mapping for user pte. We can bypass below
reserve/free in that case.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/pgtable.h |  10 ++
 arch/x86/mm/pat.c              | 236 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 246 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index c012f3b11671..7dcd94c29044 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -219,6 +219,11 @@ static inline unsigned long pte_pfn(pte_t pte)
 	return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
 }
 
+static inline u64 pte_pa(pte_t pte)
+{
+	return pte_val(pte) & PTE_PFN_MASK;
+}
+
 #define pte_page(pte)	pfn_to_page(pte_pfn(pte))
 
 static inline int pmd_large(pmd_t pte)
@@ -328,6 +333,11 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 
 #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
 
+/* Indicate that x86 has its own track and untrack pfn vma functions */
+#define track_pfn_vma_new track_pfn_vma_new
+#define track_pfn_vma_copy track_pfn_vma_copy
+#define untrack_pfn_vma untrack_pfn_vma
+
 #ifndef __ASSEMBLY__
 #define __HAVE_PHYS_MEM_ACCESS_PROT
 struct file;
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index eb1bf000d12e..1069ffecf77d 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -596,6 +596,242 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
 	free_memtype(addr, addr + size);
 }
 
+/*
+ * Internal interface to reserve a range of physical memory with prot.
+ * Reserved non RAM regions only and after successful reserve_memtype,
+ * this func also keeps identity mapping (if any) in sync with this new prot.
+ */
+static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot)
+{
+	int is_ram = 0;
+	int id_sz, ret;
+	unsigned long flags;
+	unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
+
+	is_ram = pagerange_is_ram(paddr, paddr + size);
+
+	if (is_ram != 0) {
+		/*
+		 * For mapping RAM pages, drivers need to call
+		 * set_memory_[uc|wc|wb] directly, for reserve and free, before
+		 * setting up the PTE.
+		 */
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+
+	ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
+	if (ret)
+		return ret;
+
+	if (flags != want_flags) {
+		free_memtype(paddr, paddr + size);
+		printk(KERN_ERR
+		"%s:%d map pfn expected mapping type %s for %Lx-%Lx, got %s\n",
+			current->comm, current->pid,
+			cattr_name(want_flags),
+			(unsigned long long)paddr,
+			(unsigned long long)(paddr + size),
+			cattr_name(flags));
+		return -EINVAL;
+	}
+
+	/* Need to keep identity mapping in sync */
+	if (paddr >= __pa(high_memory))
+		return 0;
+
+	id_sz = (__pa(high_memory) < paddr + size) ?
+				__pa(high_memory) - paddr :
+				size;
+
+	if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) {
+		free_memtype(paddr, paddr + size);
+		printk(KERN_ERR
+			"%s:%d reserve_pfn_range ioremap_change_attr failed %s "
+			"for %Lx-%Lx\n",
+			current->comm, current->pid,
+			cattr_name(flags),
+			(unsigned long long)paddr,
+			(unsigned long long)(paddr + size));
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * Internal interface to free a range of physical memory.
+ * Frees non RAM regions only.
+ */
+static void free_pfn_range(u64 paddr, unsigned long size)
+{
+	int is_ram;
+
+	is_ram = pagerange_is_ram(paddr, paddr + size);
+	if (is_ram == 0)
+		free_memtype(paddr, paddr + size);
+}
+
+/*
+ * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
+ * copied through copy_page_range().
+ *
+ * If the vma has a linear pfn mapping for the entire range, we get the prot
+ * from pte and reserve the entire vma range with single reserve_pfn_range call.
+ * Otherwise, we reserve the entire vma range, my ging through the PTEs page
+ * by page to get physical address and protection.
+ */
+int track_pfn_vma_copy(struct vm_area_struct *vma)
+{
+	int retval = 0;
+	unsigned long i, j;
+	u64 paddr;
+	pgprot_t prot;
+	pte_t pte;
+	unsigned long vma_start = vma->vm_start;
+	unsigned long vma_end = vma->vm_end;
+	unsigned long vma_size = vma_end - vma_start;
+
+	if (!pat_enabled)
+		return 0;
+
+	if (is_linear_pfn_mapping(vma)) {
+		/*
+		 * reserve the whole chunk starting from vm_pgoff,
+		 * But, we have to get the protection from pte.
+		 */
+		if (follow_pfnmap_pte(vma, vma_start, &pte)) {
+			WARN_ON_ONCE(1);
+			return -1;
+		}
+		prot = pte_pgprot(pte);
+		paddr = (u64)vma->vm_pgoff << PAGE_SHIFT;
+		return reserve_pfn_range(paddr, vma_size, prot);
+	}
+
+	/* reserve entire vma page by page, using pfn and prot from pte */
+	for (i = 0; i < vma_size; i += PAGE_SIZE) {
+		if (follow_pfnmap_pte(vma, vma_start + i, &pte))
+			continue;
+
+		paddr = pte_pa(pte);
+		prot = pte_pgprot(pte);
+		retval = reserve_pfn_range(paddr, PAGE_SIZE, prot);
+		if (retval)
+			goto cleanup_ret;
+	}
+	return 0;
+
+cleanup_ret:
+	/* Reserve error: Cleanup partial reservation and return error */
+	for (j = 0; j < i; j += PAGE_SIZE) {
+		if (follow_pfnmap_pte(vma, vma_start + j, &pte))
+			continue;
+
+		paddr = pte_pa(pte);
+		free_pfn_range(paddr, PAGE_SIZE);
+	}
+
+	return retval;
+}
+
+/*
+ * track_pfn_vma_new is called when a _new_ pfn mapping is being established
+ * for physical range indicated by pfn and size.
+ *
+ * prot is passed in as a parameter for the new mapping. If the vma has a
+ * linear pfn mapping for the entire range reserve the entire vma range with
+ * single reserve_pfn_range call.
+ * Otherwise, we look t the pfn and size and reserve only the specified range
+ * page by page.
+ *
+ * Note that this function can be called with caller trying to map only a
+ * subrange/page inside the vma.
+ */
+int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
+			unsigned long pfn, unsigned long size)
+{
+	int retval = 0;
+	unsigned long i, j;
+	u64 base_paddr;
+	u64 paddr;
+	unsigned long vma_start = vma->vm_start;
+	unsigned long vma_end = vma->vm_end;
+	unsigned long vma_size = vma_end - vma_start;
+
+	if (!pat_enabled)
+		return 0;
+
+	if (is_linear_pfn_mapping(vma)) {
+		/* reserve the whole chunk starting from vm_pgoff */
+		paddr = (u64)vma->vm_pgoff << PAGE_SHIFT;
+		return reserve_pfn_range(paddr, vma_size, prot);
+	}
+
+	/* reserve page by page using pfn and size */
+	base_paddr = (u64)pfn << PAGE_SHIFT;
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		paddr = base_paddr + i;
+		retval = reserve_pfn_range(paddr, PAGE_SIZE, prot);
+		if (retval)
+			goto cleanup_ret;
+	}
+	return 0;
+
+cleanup_ret:
+	/* Reserve error: Cleanup partial reservation and return error */
+	for (j = 0; j < i; j += PAGE_SIZE) {
+		paddr = base_paddr + j;
+		free_pfn_range(paddr, PAGE_SIZE);
+	}
+
+	return retval;
+}
+
+/*
+ * untrack_pfn_vma is called while unmapping a pfnmap for a region.
+ * untrack can be called for a specific region indicated by pfn and size or
+ * can be for the entire vma (in which case size can be zero).
+ */
+void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
+			unsigned long size)
+{
+	unsigned long i;
+	u64 paddr;
+	unsigned long vma_start = vma->vm_start;
+	unsigned long vma_end = vma->vm_end;
+	unsigned long vma_size = vma_end - vma_start;
+
+	if (!pat_enabled)
+		return;
+
+	if (is_linear_pfn_mapping(vma)) {
+		/* free the whole chunk starting from vm_pgoff */
+		paddr = (u64)vma->vm_pgoff << PAGE_SHIFT;
+		free_pfn_range(paddr, vma_size);
+		return;
+	}
+
+	if (size != 0 && size != vma_size) {
+		/* free page by page, using pfn and size */
+		paddr = (u64)pfn << PAGE_SHIFT;
+		for (i = 0; i < size; i += PAGE_SIZE) {
+			paddr = paddr + i;
+			free_pfn_range(paddr, PAGE_SIZE);
+		}
+	} else {
+		/* free entire vma, page by page, using the pfn from pte */
+		for (i = 0; i < vma_size; i += PAGE_SIZE) {
+			pte_t pte;
+
+			if (follow_pfnmap_pte(vma, vma_start + i, &pte))
+				continue;
+
+			paddr = pte_pa(pte);
+			free_pfn_range(paddr, PAGE_SIZE);
+		}
+	}
+}
+
 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
 
 /* get Nth element of the linked list */
-- 
cgit v1.2.3-70-g09d2


From 8a7b12f70fb135a1b1d865687de3edcdc780f6d1 Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Thu, 18 Dec 2008 11:41:31 -0800
Subject: x86: PAT: change pgprot_noncached to uc_minus instead of strong uc -
 v3

Impact: mm behavior change.

Make pgprot_noncached uc_minus instead of strong UC. This will make
pgprot_noncached to be in line with ioremap_nocache() and all the other
APIs that map page uc_minus on uc request.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/pgtable.h    | 8 ++++++++
 arch/x86/include/asm/pgtable_32.h | 9 ---------
 arch/x86/include/asm/pgtable_64.h | 6 ------
 3 files changed, 8 insertions(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 7dcd94c29044..6968d4f6be3e 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -158,6 +158,14 @@
 #define PGD_IDENT_ATTR	 0x001		/* PRESENT (no other attributes) */
 #endif
 
+/*
+ * Macro to mark a page protection value as UC-
+ */
+#define pgprot_noncached(prot)					\
+	((boot_cpu_data.x86 > 3)				\
+	 ? (__pgprot(pgprot_val(prot) | _PAGE_CACHE_UC_MINUS))	\
+	 : (prot))
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index f9d5889b336b..72b020deb46b 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -100,15 +100,6 @@ extern unsigned long pg0[];
 # include <asm/pgtable-2level.h>
 #endif
 
-/*
- * Macro to mark a page protection value as "uncacheable".
- * On processors which do not support it, this is a no-op.
- */
-#define pgprot_noncached(prot)					\
-	((boot_cpu_data.x86 > 3)				\
-	 ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT))	\
-	 : (prot))
-
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 545a0e042bb2..4798a4033e3a 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -176,12 +176,6 @@ static inline int pmd_bad(pmd_t pmd)
 
 #define pages_to_mb(x)	((x) >> (20 - PAGE_SHIFT))   /* FIXME: is this right? */
 
-/*
- * Macro to mark a page protection value as "uncacheable".
- */
-#define pgprot_noncached(prot)					\
-	(__pgprot(pgprot_val((prot)) | _PAGE_PCD | _PAGE_PWT))
-
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
-- 
cgit v1.2.3-70-g09d2


From 2520bd3123c00272f818a176c92d03c7d0a113d6 Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Thu, 18 Dec 2008 11:41:32 -0800
Subject: x86: PAT: add pgprot_writecombine() interface for drivers - v3

Impact: New mm functionality.

Add pgprot_writecombine. pgprot_writecombine will be aliased to
pgprot_noncached when not supported by the architecture.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/pgtable.h | 3 +++
 arch/x86/mm/pat.c              | 8 ++++++++
 include/asm-generic/pgtable.h  | 4 ++++
 3 files changed, 15 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 6968d4f6be3e..579f8ceee948 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -168,6 +168,9 @@
 
 #ifndef __ASSEMBLY__
 
+#define pgprot_writecombine	pgprot_writecombine
+extern pgprot_t pgprot_writecombine(pgprot_t prot);
+
 /*
  * ZERO_PAGE is a global shared page that is always zero: used
  * for zero-mapped memory areas etc..
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 1069ffecf77d..d5254bae84f4 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -832,6 +832,14 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
 	}
 }
 
+pgprot_t pgprot_writecombine(pgprot_t prot)
+{
+	if (pat_enabled)
+		return __pgprot(pgprot_val(prot) | _PAGE_CACHE_WC);
+	else
+		return pgprot_noncached(prot);
+}
+
 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
 
 /* get Nth element of the linked list */
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index ef87f889ef62..b84633801fb6 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 #define move_pte(pte, prot, old_addr, new_addr)	(pte)
 #endif
 
+#ifndef pgprot_writecombine
+#define pgprot_writecombine pgprot_noncached
+#endif
+
 /*
  * When walking page tables, get the address of the next boundary,
  * or the end address of the range if that comes earlier.  Although no
-- 
cgit v1.2.3-70-g09d2


From d1769d5475176124af04fa69848b022c98c4bc37 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh <jaswinder@infradead.org>
Date: Fri, 19 Dec 2008 00:03:56 +0530
Subject: x86: traps.c declare functions before they get used

Impact: cleanup

 In asm/traps.h :-
 do_double_fault : added under X86_64
 sync_regs : added under X86_64
 math_error : moved out from X86_32 as it is common for both 32 and 64 bit
 math_emulate : moved from X86_32 as it is common for both 32 and 64 bit
 smp_thermal_interrupt : added under X86_64
 mce_threshold_interrupt : added under X86_64

Signed-off-by: Jaswinder Singh <jaswinder@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/traps.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 45dee286e45c..2ee0a3bceedf 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -46,6 +46,10 @@ dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long);
 dotraplinkage void do_invalid_TSS(struct pt_regs *, long);
 dotraplinkage void do_segment_not_present(struct pt_regs *, long);
 dotraplinkage void do_stack_segment(struct pt_regs *, long);
+#ifdef CONFIG_X86_64
+dotraplinkage void do_double_fault(struct pt_regs *, long);
+asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *);
+#endif
 dotraplinkage void do_general_protection(struct pt_regs *, long);
 dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
 dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long);
@@ -72,10 +76,13 @@ static inline int get_si_code(unsigned long condition)
 extern int panic_on_unrecovered_nmi;
 extern int kstack_depth_to_print;
 
-#ifdef CONFIG_X86_32
 void math_error(void __user *);
-unsigned long patch_espfix_desc(unsigned long, unsigned long);
 asmlinkage void math_emulate(long);
+#ifdef CONFIG_X86_32
+unsigned long patch_espfix_desc(unsigned long, unsigned long);
+#else
+asmlinkage void smp_thermal_interrupt(void);
+asmlinkage void mce_threshold_interrupt(void);
 #endif
 
 #endif /* _ASM_X86_TRAPS_H */
-- 
cgit v1.2.3-70-g09d2


From b2fa739c06931d167b6d2aa7b514ab7f30d04dc0 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Thu, 18 Dec 2008 14:43:34 -0800
Subject: x86: sigframe.h: include headers for dependency

Impact: cleanup

Include following headers for dependency.
asm/sigcontext.h
asm/siginfo.h
asm/ucontext.h

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/sigframe.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h
index 3bd0f4276000..4e0fe26d27d3 100644
--- a/arch/x86/include/asm/sigframe.h
+++ b/arch/x86/include/asm/sigframe.h
@@ -1,6 +1,10 @@
 #ifndef _ASM_X86_SIGFRAME_H
 #define _ASM_X86_SIGFRAME_H
 
+#include <asm/sigcontext.h>
+#include <asm/siginfo.h>
+#include <asm/ucontext.h>
+
 #ifdef CONFIG_X86_32
 #define sigframe_ia32		sigframe
 #define rt_sigframe_ia32	rt_sigframe
-- 
cgit v1.2.3-70-g09d2


From 8869a2e5d3a66d5b63b948052d60cd13ede8b735 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Thu, 18 Dec 2008 14:46:52 -0800
Subject: x86: asm-offset_64: use rt_sigframe_ia32

Impact: cleanup

Use rt_sigframe_ia32 instead of rt_sigframe32.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/asm-offsets_64.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 7fcf63d22f8b..1d41d3f1edbc 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -20,6 +20,8 @@
 
 #include <xen/interface/xen.h>
 
+#include <asm/sigframe.h>
+
 #define __NO_STUBS 1
 #undef __SYSCALL
 #undef _ASM_X86_UNISTD_64_H
@@ -87,7 +89,7 @@ int main(void)
 	BLANK();
 #undef ENTRY
 	DEFINE(IA32_RT_SIGFRAME_sigcontext,
-	       offsetof (struct rt_sigframe32, uc.uc_mcontext));
+	       offsetof (struct rt_sigframe_ia32, uc.uc_mcontext));
 	BLANK();
 #endif
 	DEFINE(pbe_address, offsetof(struct pbe, address));
-- 
cgit v1.2.3-70-g09d2


From 9f221495997d180df51ce4d8296669445dd3e7b3 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Thu, 18 Dec 2008 14:47:37 -0800
Subject: x86: ia32.h: remove unused struct sigfram32 and rt_sigframe32

Impact: cleanup

Remove struct sigfram32 and rt_sigframe32 because there is no user.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/ia32.h | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 97989c0e534c..50ca486fd88c 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -129,24 +129,6 @@ typedef struct compat_siginfo {
 	} _sifields;
 } compat_siginfo_t;
 
-struct sigframe32 {
-	u32 pretcode;
-	int sig;
-	struct sigcontext_ia32 sc;
-	struct _fpstate_ia32 fpstate;
-	unsigned int extramask[_COMPAT_NSIG_WORDS-1];
-};
-
-struct rt_sigframe32 {
-	u32 pretcode;
-	int sig;
-	u32 pinfo;
-	u32 puc;
-	compat_siginfo_t info;
-	struct ucontext_ia32 uc;
-	struct _fpstate_ia32 fpstate;
-};
-
 struct ustat32 {
 	__u32			f_tfree;
 	compat_ino_t		f_tinode;
-- 
cgit v1.2.3-70-g09d2


From f34a10bd9f8cc95ebdc69a079db195636b2e22e0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 19 Dec 2008 01:36:14 +0100
Subject: x86: fix warning in arch/x86/kernel/microcode_amd.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

this warning:

  arch/x86/kernel/microcode_amd.c: In function ‘apply_microcode_amd’:
  arch/x86/kernel/microcode_amd.c:163: warning: cast from pointer to integer of different size
  arch/x86/kernel/microcode_amd.c:163: warning: cast from pointer to integer of different size

triggers because we want to pass the address to the microcode MSR,
which is 64-bit even on 32-bit. Cast it explicitly to express this.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_amd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 24c256f4e50a..c25fdb382292 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -160,7 +160,7 @@ static void apply_microcode_amd(int cpu)
 		return;
 
 	spin_lock_irqsave(&microcode_update_lock, flags);
-	wrmsrl(MSR_AMD64_PATCH_LOADER, &mc_amd->hdr.data_code);
+	wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
 	/* get patch id after patching */
 	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
 	spin_unlock_irqrestore(&microcode_update_lock, flags);
@@ -372,3 +372,4 @@ struct microcode_ops * __init init_amd_microcode(void)
 {
 	return &microcode_amd_ops;
 }
+
-- 
cgit v1.2.3-70-g09d2


From 345077cd98ff5532b2d1158013c3fec7b1ae85ec Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Thu, 18 Dec 2008 18:09:21 -0800
Subject: x86: fix intel x86_64 llc_shared_map/cpu_llc_id anomolies

Impact: fix wrong cache sharing detection on platforms supporting > 8 bit apicid's

In the presence of extended topology eumeration leaf 0xb provided
by cpuid, 32bit extended initial_apicid in cpuinfo_x86 struct will be
updated by detect_extended_topology(). At this instance, we should also
reinit the apicid (which could also potentially be extended to 32bit).

With out this there will potentially be duplicate apicid's populated in the
per cpu's cpuinfo_x86 struct, resulting in wrong cache sharing topology etc
detected by init_intel_cacheinfo().

Reported-by: Dimitri Sivanich <sivanich@sgi.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Dimitri Sivanich <sivanich@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: <stable@kernel.org>
---
 arch/x86/kernel/cpu/addon_cpuid_features.c | 8 ++++++++
 arch/x86/kernel/cpu/intel.c                | 8 +++++++-
 2 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index ef8f831af823..2cf23634b6d9 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -120,9 +120,17 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
 	c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width)
 						 & core_select_mask;
 	c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width);
+	/*
+	 * Reinit the apicid, now that we have extended initial_apicid.
+	 */
+	c->apicid = phys_pkg_id(c->initial_apicid, 0);
 #else
 	c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask;
 	c->phys_proc_id = phys_pkg_id(core_plus_mask_width);
+	/*
+	 * Reinit the apicid, now that we have extended initial_apicid.
+	 */
+	c->apicid = phys_pkg_id(0);
 #endif
 	c->x86_max_cores = (core_level_siblings / smp_num_siblings);
 
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index caec59437a22..b21c37c060a2 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -252,6 +252,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 
 	intel_workarounds(c);
 
+	/*
+	 * Detect the extended topology information if available. This
+	 * will reinitialise the initial_apicid which will be used
+	 * in init_intel_cacheinfo()
+	 */
+	detect_extended_topology(c);
+
 	l2 = init_intel_cacheinfo(c);
 	if (c->cpuid_level > 9) {
 		unsigned eax = cpuid_eax(10);
@@ -323,7 +330,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 
 #endif
 
-	detect_extended_topology(c);
 	if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
 		/*
 		 * let's use the legacy cpuid vector 0x1 and 0x4 for topology
-- 
cgit v1.2.3-70-g09d2


From 34945ede31071ac7d72270cc6c1893323f392b3f Mon Sep 17 00:00:00 2001
From: Jaswinder Singh <jaswinder@infradead.org>
Date: Fri, 19 Dec 2008 22:33:52 +0530
Subject: x86: common.c boot_cpu_stack and boot_exception_stacks should be
 static

Impact: cleanup, avoid sparse warnings, reduce kernel size a bit

Fixes these sparse warnings:

 arch/x86/kernel/cpu/common.c:869:6: warning: symbol 'boot_cpu_stack' was not declared. Should it be static?
 arch/x86/kernel/cpu/common.c:910:6: warning: symbol 'boot_exception_stacks' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh <jaswinder@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/common.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b9c9ea0217a9..aba49c782fd6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -862,7 +862,7 @@ EXPORT_SYMBOL(_cpu_pda);
 
 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
 
-char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
+static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
 
 void __cpuinit pda_init(int cpu)
 {
@@ -903,8 +903,8 @@ void __cpuinit pda_init(int cpu)
 	}
 }
 
-char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
-			   DEBUG_STKSZ] __page_aligned_bss;
+static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
+				  DEBUG_STKSZ] __page_aligned_bss;
 
 extern asmlinkage void ignore_sysret(void);
 
-- 
cgit v1.2.3-70-g09d2


From 8403295e0fa460f6240e2d781e25dc29189f33c7 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Fri, 19 Dec 2008 14:25:50 -0800
Subject: x86: ia32_signal: remove unnecessary declaration

Impact: cleanup

No need to declare do_signal().

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 3b3878a63bc2..09513f8a2896 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -43,7 +43,6 @@
 			 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
 			 X86_EFLAGS_CF)
 
-asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
 
 int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
-- 
cgit v1.2.3-70-g09d2


From 982d789ab76c8a11426852fec2fdf2f412e21c0c Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Fri, 19 Dec 2008 13:47:28 -0800
Subject: x86: PAT: remove follow_pfnmap_pte in favor of follow_phys

Impact: Cleanup - removes a new function in favor of a recently modified older one.

Replace follow_pfnmap_pte in pat code with follow_phys. follow_phys lso
returns protection eliminating the need of pte_pgprot call. Using follow_phys
also eliminates the need for pte_pa.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/pgtable.h |  5 -----
 arch/x86/mm/pat.c              | 30 +++++++++++------------------
 include/linux/mm.h             |  3 ---
 mm/memory.c                    | 43 ------------------------------------------
 4 files changed, 11 insertions(+), 70 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 579f8ceee948..2aa792bbd7e0 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -230,11 +230,6 @@ static inline unsigned long pte_pfn(pte_t pte)
 	return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
 }
 
-static inline u64 pte_pa(pte_t pte)
-{
-	return pte_val(pte) & PTE_PFN_MASK;
-}
-
 #define pte_page(pte)	pfn_to_page(pte_pfn(pte))
 
 static inline int pmd_large(pmd_t pte)
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index d5254bae84f4..541bcc944a5b 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -685,8 +685,7 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
 	int retval = 0;
 	unsigned long i, j;
 	u64 paddr;
-	pgprot_t prot;
-	pte_t pte;
+	unsigned long prot;
 	unsigned long vma_start = vma->vm_start;
 	unsigned long vma_end = vma->vm_end;
 	unsigned long vma_size = vma_end - vma_start;
@@ -696,26 +695,22 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
 
 	if (is_linear_pfn_mapping(vma)) {
 		/*
-		 * reserve the whole chunk starting from vm_pgoff,
-		 * But, we have to get the protection from pte.
+		 * reserve the whole chunk covered by vma. We need the
+		 * starting address and protection from pte.
 		 */
-		if (follow_pfnmap_pte(vma, vma_start, &pte)) {
+		if (follow_phys(vma, vma_start, 0, &prot, &paddr)) {
 			WARN_ON_ONCE(1);
-			return -1;
+			return -EINVAL;
 		}
-		prot = pte_pgprot(pte);
-		paddr = (u64)vma->vm_pgoff << PAGE_SHIFT;
-		return reserve_pfn_range(paddr, vma_size, prot);
+		return reserve_pfn_range(paddr, vma_size, __pgprot(prot));
 	}
 
 	/* reserve entire vma page by page, using pfn and prot from pte */
 	for (i = 0; i < vma_size; i += PAGE_SIZE) {
-		if (follow_pfnmap_pte(vma, vma_start + i, &pte))
+		if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
 			continue;
 
-		paddr = pte_pa(pte);
-		prot = pte_pgprot(pte);
-		retval = reserve_pfn_range(paddr, PAGE_SIZE, prot);
+		retval = reserve_pfn_range(paddr, PAGE_SIZE, __pgprot(prot));
 		if (retval)
 			goto cleanup_ret;
 	}
@@ -724,10 +719,9 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
 cleanup_ret:
 	/* Reserve error: Cleanup partial reservation and return error */
 	for (j = 0; j < i; j += PAGE_SIZE) {
-		if (follow_pfnmap_pte(vma, vma_start + j, &pte))
+		if (follow_phys(vma, vma_start + j, 0, &prot, &paddr))
 			continue;
 
-		paddr = pte_pa(pte);
 		free_pfn_range(paddr, PAGE_SIZE);
 	}
 
@@ -797,6 +791,7 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
 {
 	unsigned long i;
 	u64 paddr;
+	unsigned long prot;
 	unsigned long vma_start = vma->vm_start;
 	unsigned long vma_end = vma->vm_end;
 	unsigned long vma_size = vma_end - vma_start;
@@ -821,12 +816,9 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
 	} else {
 		/* free entire vma, page by page, using the pfn from pte */
 		for (i = 0; i < vma_size; i += PAGE_SIZE) {
-			pte_t pte;
-
-			if (follow_pfnmap_pte(vma, vma_start + i, &pte))
+			if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
 				continue;
 
-			paddr = pte_pa(pte);
 			free_pfn_range(paddr, PAGE_SIZE);
 		}
 	}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2f6e2f886d4b..36f9b3fa5e15 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1239,9 +1239,6 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
 #define FOLL_GET	0x04	/* do get_page on page */
 #define FOLL_ANON	0x08	/* give ZERO_PAGE if no pgtable */
 
-int follow_pfnmap_pte(struct vm_area_struct *vma,
-				unsigned long address, pte_t *ret_ptep);
-
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
 extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
diff --git a/mm/memory.c b/mm/memory.c
index 79f28e35d4fc..6b29f39a5a3e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1168,49 +1168,6 @@ no_page_table:
 	return page;
 }
 
-int follow_pfnmap_pte(struct vm_area_struct *vma, unsigned long address,
-			pte_t *ret_ptep)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *ptep, pte;
-	spinlock_t *ptl;
-	struct page *page;
-	struct mm_struct *mm = vma->vm_mm;
-
-	if (!is_pfn_mapping(vma))
-		goto err;
-
-	page = NULL;
-	pgd = pgd_offset(mm, address);
-	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
-		goto err;
-
-	pud = pud_offset(pgd, address);
-	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
-		goto err;
-
-	pmd = pmd_offset(pud, address);
-	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
-		goto err;
-
-	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
-
-	pte = *ptep;
-	if (!pte_present(pte))
-		goto err_unlock;
-
-	*ret_ptep = pte;
-	pte_unmap_unlock(ptep, ptl);
-	return 0;
-
-err_unlock:
-	pte_unmap_unlock(ptep, ptl);
-err:
-	return -EINVAL;
-}
-
 /* Can we do the FOLL_ANON optimization? */
 static inline int use_zero_page(struct vm_area_struct *vma)
 {
-- 
cgit v1.2.3-70-g09d2


From 34801ba9bf0381fcf0e2b08179d2c07f2c6ede74 Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Fri, 19 Dec 2008 13:47:29 -0800
Subject: x86: PAT: move track untrack pfnmap stubs to asm-generic

Impact: Cleanup and branch hints only.

Move the track and untrack pfn stub routines from memory.c to asm-generic.
Also add unlikely to pfnmap related calls in fork and exit path.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/pgtable.h |  6 ++----
 include/asm-generic/pgtable.h  | 46 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mm.h             |  6 ------
 mm/memory.c                    | 48 ++----------------------------------------
 4 files changed, 50 insertions(+), 56 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 2aa792bbd7e0..875192bf72cb 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -339,12 +339,10 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 
 #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
 
+#ifndef __ASSEMBLY__
 /* Indicate that x86 has its own track and untrack pfn vma functions */
-#define track_pfn_vma_new track_pfn_vma_new
-#define track_pfn_vma_copy track_pfn_vma_copy
-#define untrack_pfn_vma untrack_pfn_vma
+#define __HAVE_PFNMAP_TRACKING
 
-#ifndef __ASSEMBLY__
 #define __HAVE_PHYS_MEM_ACCESS_PROT
 struct file;
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index b84633801fb6..72ebe91005a8 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -293,6 +293,52 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 #define arch_flush_lazy_cpu_mode()	do {} while (0)
 #endif
 
+#ifndef __HAVE_PFNMAP_TRACKING
+/*
+ * Interface that can be used by architecture code to keep track of
+ * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
+ *
+ * track_pfn_vma_new is called when a _new_ pfn mapping is being established
+ * for physical range indicated by pfn and size.
+ */
+static inline int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
+					unsigned long pfn, unsigned long size)
+{
+	return 0;
+}
+
+/*
+ * Interface that can be used by architecture code to keep track of
+ * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
+ *
+ * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
+ * copied through copy_page_range().
+ */
+static inline int track_pfn_vma_copy(struct vm_area_struct *vma)
+{
+	return 0;
+}
+
+/*
+ * Interface that can be used by architecture code to keep track of
+ * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
+ *
+ * untrack_pfn_vma is called while unmapping a pfnmap for a region.
+ * untrack can be called for a specific region indicated by pfn and size or
+ * can be for the entire vma (in which case size can be zero).
+ */
+static inline void untrack_pfn_vma(struct vm_area_struct *vma,
+					unsigned long pfn, unsigned long size)
+{
+}
+#else
+extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
+				unsigned long pfn, unsigned long size);
+extern int track_pfn_vma_copy(struct vm_area_struct *vma);
+extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
+				unsigned long size);
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_GENERIC_PGTABLE_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 36f9b3fa5e15..d3ddd735e375 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -163,12 +163,6 @@ static inline int is_pfn_mapping(struct vm_area_struct *vma)
 	return (vma->vm_flags & VM_PFNMAP);
 }
 
-extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
-				unsigned long pfn, unsigned long size);
-extern int track_pfn_vma_copy(struct vm_area_struct *vma);
-extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
-				unsigned long size);
-
 /*
  * vm_fault is filled by the the pagefault handler and passed to the vma's
  * ->fault function. The vma's ->fault is responsible for returning a bitmask
diff --git a/mm/memory.c b/mm/memory.c
index 6b29f39a5a3e..f01b7eed6e16 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -99,50 +99,6 @@ int randomize_va_space __read_mostly =
 					2;
 #endif
 
-#ifndef track_pfn_vma_new
-/*
- * Interface that can be used by architecture code to keep track of
- * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
- *
- * track_pfn_vma_new is called when a _new_ pfn mapping is being established
- * for physical range indicated by pfn and size.
- */
-int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
-			unsigned long pfn, unsigned long size)
-{
-	return 0;
-}
-#endif
-
-#ifndef track_pfn_vma_copy
-/*
- * Interface that can be used by architecture code to keep track of
- * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
- *
- * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
- * copied through copy_page_range().
- */
-int track_pfn_vma_copy(struct vm_area_struct *vma)
-{
-	return 0;
-}
-#endif
-
-#ifndef untrack_pfn_vma
-/*
- * Interface that can be used by architecture code to keep track of
- * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
- *
- * untrack_pfn_vma is called while unmapping a pfnmap for a region.
- * untrack can be called for a specific region indicated by pfn and size or
- * can be for the entire vma (in which case size can be zero).
- */
-void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
-			unsigned long size)
-{
-}
-#endif
-
 static int __init disable_randmaps(char *s)
 {
 	randomize_va_space = 0;
@@ -713,7 +669,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (is_vm_hugetlb_page(vma))
 		return copy_hugetlb_page_range(dst_mm, src_mm, vma);
 
-	if (is_pfn_mapping(vma)) {
+	if (unlikely(is_pfn_mapping(vma))) {
 		/*
 		 * We do not free on error cases below as remove_vma
 		 * gets called on error from higher level routine
@@ -969,7 +925,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 		if (vma->vm_flags & VM_ACCOUNT)
 			*nr_accounted += (end - start) >> PAGE_SHIFT;
 
-		if (is_pfn_mapping(vma))
+		if (unlikely(is_pfn_mapping(vma)))
 			untrack_pfn_vma(vma, 0, 0);
 
 		while (start != end) {
-- 
cgit v1.2.3-70-g09d2


From adf77bac052bb5bf0722b2ce2af9fefc5b2d2a71 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 22 Dec 2008 17:56:05 -0800
Subject: x86: prioritize the FPU traps for the error code

In the case of multiple FPU errors, prioritize the error codes,
instead of returning __SI_FAULT, which ends up pushing a 0 as the
error code to userspace, a POSIX violation.

For i386, we will simply return if there are no errors at all; for
x86-64 this is probably a "can't happen" (and the code should be
unified), but for this patch, return __SI_FAULT|SI_KERNEL if this ever
happens.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/traps.c | 34 +++++++++++++++-------------------
 1 file changed, 15 insertions(+), 19 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 04d242ab0161..c320c29255c2 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -664,7 +664,7 @@ void math_error(void __user *ip)
 {
 	struct task_struct *task;
 	siginfo_t info;
-	unsigned short cwd, swd;
+	unsigned short cwd, swd, err;
 
 	/*
 	 * Save the info for the exception handler and clear the error.
@@ -675,7 +675,6 @@ void math_error(void __user *ip)
 	task->thread.error_code = 0;
 	info.si_signo = SIGFPE;
 	info.si_errno = 0;
-	info.si_code = __SI_FAULT;
 	info.si_addr = ip;
 	/*
 	 * (~cwd & swd) will mask out exceptions that are not set to unmasked
@@ -689,34 +688,31 @@ void math_error(void __user *ip)
 	 */
 	cwd = get_fpu_cwd(task);
 	swd = get_fpu_swd(task);
-	switch (swd & ~cwd & 0x3f) {
-	case 0x000: /* No unmasked exception */
-#ifdef CONFIG_X86_32
+
+	err = swd & ~cwd & 0x3f;
+
+#if CONFIG_X86_32
+	if (!err)
 		return;
 #endif
-	default: /* Multiple exceptions */
-		break;
-	case 0x001: /* Invalid Op */
+
+	if (err & 0x001) {	/* Invalid op */
 		/*
 		 * swd & 0x240 == 0x040: Stack Underflow
 		 * swd & 0x240 == 0x240: Stack Overflow
 		 * User must clear the SF bit (0x40) if set
 		 */
 		info.si_code = FPE_FLTINV;
-		break;
-	case 0x002: /* Denormalize */
-	case 0x010: /* Underflow */
-		info.si_code = FPE_FLTUND;
-		break;
-	case 0x004: /* Zero Divide */
+	} else if (err & 0x004) { /* Divide by Zero */
 		info.si_code = FPE_FLTDIV;
-		break;
-	case 0x008: /* Overflow */
+	} else if (err & 0x008) { /* Overflow */
 		info.si_code = FPE_FLTOVF;
-		break;
-	case 0x020: /* Precision */
+	} else if (err & 0x012) { /* Denormal, Underflow */
+		info.si_code = FPE_FLTUND;
+	} else if (err & 0x020) { /* Precision */
 		info.si_code = FPE_FLTRES;
-		break;
+	} else {
+		info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */
 	}
 	force_sig_info(SIGFPE, &info, task);
 }
-- 
cgit v1.2.3-70-g09d2


From c1c15b65ec30275575dac9322aae607075769fbc Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Tue, 23 Dec 2008 10:10:40 -0800
Subject: x86: PAT: fix address types in track_pfn_vma_new()

Impact: cleanup, fix warning

This warning:

 arch/x86/mm/pat.c: In function track_pfn_vma_copy:
 arch/x86/mm/pat.c:701: warning: passing argument 5 of follow_phys from incompatible pointer type

Triggers because physical addresses are resource_size_t, not u64.

This really matters when calling an interface like follow_phys() which
takes a pointer to a physical address -- although on x86, being
littleendian, it would generally work anyway as long as the memory region
wasn't completely uninitialized.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pat.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 541bcc944a5b..85cbd3cd3723 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -684,7 +684,7 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
 {
 	int retval = 0;
 	unsigned long i, j;
-	u64 paddr;
+	resource_size_t paddr;
 	unsigned long prot;
 	unsigned long vma_start = vma->vm_start;
 	unsigned long vma_end = vma->vm_end;
@@ -746,8 +746,8 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
 {
 	int retval = 0;
 	unsigned long i, j;
-	u64 base_paddr;
-	u64 paddr;
+	resource_size_t base_paddr;
+	resource_size_t paddr;
 	unsigned long vma_start = vma->vm_start;
 	unsigned long vma_end = vma->vm_end;
 	unsigned long vma_size = vma_end - vma_start;
@@ -757,12 +757,12 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
 
 	if (is_linear_pfn_mapping(vma)) {
 		/* reserve the whole chunk starting from vm_pgoff */
-		paddr = (u64)vma->vm_pgoff << PAGE_SHIFT;
+		paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
 		return reserve_pfn_range(paddr, vma_size, prot);
 	}
 
 	/* reserve page by page using pfn and size */
-	base_paddr = (u64)pfn << PAGE_SHIFT;
+	base_paddr = (resource_size_t)pfn << PAGE_SHIFT;
 	for (i = 0; i < size; i += PAGE_SIZE) {
 		paddr = base_paddr + i;
 		retval = reserve_pfn_range(paddr, PAGE_SIZE, prot);
@@ -790,7 +790,7 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
 			unsigned long size)
 {
 	unsigned long i;
-	u64 paddr;
+	resource_size_t paddr;
 	unsigned long prot;
 	unsigned long vma_start = vma->vm_start;
 	unsigned long vma_end = vma->vm_end;
@@ -801,14 +801,14 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
 
 	if (is_linear_pfn_mapping(vma)) {
 		/* free the whole chunk starting from vm_pgoff */
-		paddr = (u64)vma->vm_pgoff << PAGE_SHIFT;
+		paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
 		free_pfn_range(paddr, vma_size);
 		return;
 	}
 
 	if (size != 0 && size != vma_size) {
 		/* free page by page, using pfn and size */
-		paddr = (u64)pfn << PAGE_SHIFT;
+		paddr = (resource_size_t)pfn << PAGE_SHIFT;
 		for (i = 0; i < size; i += PAGE_SIZE) {
 			paddr = paddr + i;
 			free_pfn_range(paddr, PAGE_SIZE);
-- 
cgit v1.2.3-70-g09d2


From 1fcccb008be12ea823aaa392758e1e41fb82de9a Mon Sep 17 00:00:00 2001
From: Jaswinder Singh <jaswinder@infradead.org>
Date: Tue, 23 Dec 2008 21:50:11 +0530
Subject: x86: traps.c replace #if CONFIG_X86_32 with #ifdef CONFIG_X86_32

Impact: cleanup, avoid warning on X86_64

Fixes this warning on X86_64:

  CC      arch/x86/kernel/traps.o
  arch/x86/kernel/traps.c:695:5: warning: "CONFIG_X86_32" is not defined

Signed-off-by: Jaswinder Singh <jaswinder@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/traps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c320c29255c2..f37cee75ab58 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -691,7 +691,7 @@ void math_error(void __user *ip)
 
 	err = swd & ~cwd & 0x3f;
 
-#if CONFIG_X86_32
+#ifdef CONFIG_X86_32
 	if (!err)
 		return;
 #endif
-- 
cgit v1.2.3-70-g09d2