From 39d0c30d00d7ca44f62e0fb11eab053b17224a8f Mon Sep 17 00:00:00 2001
From: Sachin Kamat <sachin.kamat@linaro.org>
Date: Wed, 6 Mar 2013 16:53:43 +0530
Subject: ARC: Remove unneeded version.h header include

version.h header file inclusion is not necessary as detected by
versioncheck script.

Signed-off-by: Sachin Kamat <sachin.kamat@linaro.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/fault.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index af55aab803d2..689ffd86d5e9 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -12,7 +12,6 @@
 #include <linux/sched.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
-#include <linux/version.h>
 #include <linux/uaccess.h>
 #include <linux/kdebug.h>
 #include <asm/pgalloc.h>
-- 
cgit v1.2.3-70-g09d2


From 1ec9db1056b0c4b8b9dfca4736634c7c8e0833d5 Mon Sep 17 00:00:00 2001
From: Sachin Kamat <sachin.kamat@linaro.org>
Date: Wed, 6 Mar 2013 16:53:44 +0530
Subject: ARC: Use <linux/*> headers instead of <asm/*>

Silences the following checkpatch warnings:
WARNING: Use #include <linux/ptrace.h> instead of <asm/ptrace.h>
WARNING: Use #include <linux/kprobes.h> instead of <asm/kprobes.h>
WARNING: Use #include <linux/kgdb.h> instead of <asm/kgdb.h>
WARNING: Use #include <linux/uaccess.h> instead of <asm/uaccess.h>
WARNING: Use #include <linux/cache.h> instead of <asm/cache.h>

Signed-off-by: Sachin Kamat <sachin.kamat@linaro.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/asm-offsets.c | 2 +-
 arch/arc/kernel/disasm.c      | 2 +-
 arch/arc/kernel/setup.c       | 2 +-
 arch/arc/kernel/traps.c       | 6 +++---
 arch/arc/mm/ioremap.c         | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c
index 0dc148ebce74..7dcda7025241 100644
--- a/arch/arc/kernel/asm-offsets.c
+++ b/arch/arc/kernel/asm-offsets.c
@@ -11,9 +11,9 @@
 #include <linux/interrupt.h>
 #include <linux/thread_info.h>
 #include <linux/kbuild.h>
+#include <linux/ptrace.h>
 #include <asm/hardirq.h>
 #include <asm/page.h>
-#include <asm/ptrace.h>
 
 int main(void)
 {
diff --git a/arch/arc/kernel/disasm.c b/arch/arc/kernel/disasm.c
index 2f390289a792..df957a4dbd60 100644
--- a/arch/arc/kernel/disasm.c
+++ b/arch/arc/kernel/disasm.c
@@ -12,8 +12,8 @@
 #include <linux/types.h>
 #include <linux/kprobes.h>
 #include <linux/slab.h>
+#include <linux/uaccess.h>
 #include <asm/disasm.h>
-#include <asm/uaccess.h>
 
 #if defined(CONFIG_KGDB) || defined(CONFIG_ARC_MISALIGN_ACCESS) || \
 	defined(CONFIG_KPROBES)
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 2d95ac07df7b..25262045ac5a 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -14,10 +14,10 @@
 #include <linux/module.h>
 #include <linux/cpu.h>
 #include <linux/of_fdt.h>
+#include <linux/cache.h>
 #include <asm/sections.h>
 #include <asm/arcregs.h>
 #include <asm/tlb.h>
-#include <asm/cache.h>
 #include <asm/setup.h>
 #include <asm/page.h>
 #include <asm/irq.h>
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index 7496995371e8..1d902f4e357e 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -16,11 +16,11 @@
 #include <linux/sched.h>
 #include <linux/kdebug.h>
 #include <linux/uaccess.h>
-#include <asm/ptrace.h>
+#include <linux/ptrace.h>
+#include <linux/kprobes.h>
+#include <linux/kgdb.h>
 #include <asm/setup.h>
-#include <asm/kprobes.h>
 #include <asm/unaligned.h>
-#include <asm/kgdb.h>
 
 void __init trap_init(void)
 {
diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c
index 3e5c92c79936..739e65f355de 100644
--- a/arch/arc/mm/ioremap.c
+++ b/arch/arc/mm/ioremap.c
@@ -12,7 +12,7 @@
 #include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
-#include <asm/cache.h>
+#include <linux/cache.h>
 
 void __iomem *ioremap(unsigned long paddr, unsigned long size)
 {
-- 
cgit v1.2.3-70-g09d2


From 955ad5959f913bd340ecb8d5e78d03ef7d1b0392 Mon Sep 17 00:00:00 2001
From: Sachin Kamat <sachin.kamat@linaro.org>
Date: Wed, 6 Mar 2013 16:53:45 +0530
Subject: ARC: Fix coding style issues

Fixes the following coding style issues as detected by checkpatch:
ERROR: space required before the open parenthesis '('
ERROR: "foo * bar" should be "foo *bar"
WARNING: space prohibited between function name and open parenthesis '('
WARNING: please, no spaces at the start of a line

Signed-off-by: Sachin Kamat <sachin.kamat@linaro.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/irq.c    | 2 +-
 arch/arc/kernel/module.c | 4 ++--
 arch/arc/mm/extable.c    | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
index 551c10dff481..d32c050dd7b9 100644
--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c
@@ -102,7 +102,7 @@ void __init init_onchip_IRQ(void)
 	struct device_node *intc = NULL;
 
 	intc = of_find_compatible_node(NULL, NULL, "snps,arc700-intc");
-	if(!intc)
+	if (!intc)
 		panic("DeviceTree Missing incore intc\n");
 
 	root_domain = irq_domain_add_legacy(intc, NR_IRQS, 0, 0,
diff --git a/arch/arc/kernel/module.c b/arch/arc/kernel/module.c
index cdd359352c0a..376e04622962 100644
--- a/arch/arc/kernel/module.c
+++ b/arch/arc/kernel/module.c
@@ -47,7 +47,7 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 		}
 	}
 #endif
-    return 0;
+	return 0;
 }
 
 void module_arch_cleanup(struct module *mod)
@@ -141,5 +141,5 @@ int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs,
 		mod->arch.unw_info = unw;
 	}
 #endif
-    return 0;
+	return 0;
 }
diff --git a/arch/arc/mm/extable.c b/arch/arc/mm/extable.c
index 014172ba8432..aa652e281324 100644
--- a/arch/arc/mm/extable.c
+++ b/arch/arc/mm/extable.c
@@ -27,7 +27,7 @@ int fixup_exception(struct pt_regs *regs)
 
 #ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
 
-long arc_copy_from_user_noinline(void *to, const void __user * from,
+long arc_copy_from_user_noinline(void *to, const void __user *from,
 		unsigned long n)
 {
 	return __arc_copy_from_user(to, from, n);
@@ -48,7 +48,7 @@ unsigned long arc_clear_user_noinline(void __user *to,
 }
 EXPORT_SYMBOL(arc_clear_user_noinline);
 
-long arc_strncpy_from_user_noinline (char *dst, const char __user *src,
+long arc_strncpy_from_user_noinline(char *dst, const char __user *src,
 		long count)
 {
 	return __arc_strncpy_from_user(dst, src, count);
-- 
cgit v1.2.3-70-g09d2


From 610c6502e0d51415dd1e4d31bc97e13ef9d25334 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Fri, 15 Mar 2013 17:16:17 +0100
Subject: ARC: remove #ifdef-ed out include of dead header

There's no (Kconfig) macro CONFIG_BLOCK_DEV_RAM. (CONFIG_BLK_DEV_RAM
does exist though.) But linux/blk.h got killed in 2005 anyway (in a
patch titled "kill blk.h"), so these three lines can be removed.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/init.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index caf797de23fc..6634cf50e3b4 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -10,9 +10,6 @@
 #include <linux/mm.h>
 #include <linux/bootmem.h>
 #include <linux/memblock.h>
-#ifdef CONFIG_BLOCK_DEV_RAM
-#include <linux/blk.h>
-#endif
 #include <linux/swap.h>
 #include <linux/module.h>
 #include <asm/page.h>
-- 
cgit v1.2.3-70-g09d2


From 30ecee8cdd05415e5602bd755d9210e1c5a5b64d Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 9 Apr 2013 17:18:12 +0530
Subject: ARC: [build] Fix warnings with CONFIG_DEBUG_SECTION_MISMATCH

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/irq.c      |  2 +-
 arch/arc/kernel/setup.c    | 10 +++++-----
 arch/arc/mm/cache_arc700.c |  4 ++--
 arch/arc/mm/tlb.c          |  4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
index d32c050dd7b9..84ce5317d9fc 100644
--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c
@@ -26,7 +26,7 @@
  * -Disable all IRQs (on CPU side)
  * -Optionally, setup the High priority Interrupts as Level 2 IRQs
  */
-void __init arc_init_IRQ(void)
+void __cpuinit arc_init_IRQ(void)
 {
 	int level_mask = 0;
 
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 694bbd467498..197514649034 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -31,14 +31,14 @@
 int running_on_hw = 1;	/* vs. on ISS */
 
 char __initdata command_line[COMMAND_LINE_SIZE];
-struct machine_desc *machine_desc __initdata;
+struct machine_desc *machine_desc __cpuinitdata;
 
 struct task_struct *_current_task[NR_CPUS];	/* For stack switching */
 
 struct cpuinfo_arc cpuinfo_arc700[NR_CPUS];
 
 
-void __init read_arc_build_cfg_regs(void)
+void __cpuinit read_arc_build_cfg_regs(void)
 {
 	struct bcr_perip uncached_space;
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
@@ -237,7 +237,7 @@ char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
 	return buf;
 }
 
-void __init arc_chk_ccms(void)
+void __cpuinit arc_chk_ccms(void)
 {
 #if defined(CONFIG_ARC_HAS_DCCM) || defined(CONFIG_ARC_HAS_ICCM)
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
@@ -272,7 +272,7 @@ void __init arc_chk_ccms(void)
  * hardware has dedicated regs which need to be saved/restored on ctx-sw
  * (Single Precision uses core regs), thus kernel is kind of oblivious to it
  */
-void __init arc_chk_fpu(void)
+void __cpuinit arc_chk_fpu(void)
 {
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
 
@@ -293,7 +293,7 @@ void __init arc_chk_fpu(void)
  *    such as only for boot CPU etc
  */
 
-void __init setup_processor(void)
+void __cpuinit setup_processor(void)
 {
 	char str[512];
 	int cpu_id = smp_processor_id();
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index 88d617d84234..c02aac649e84 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -109,7 +109,7 @@ char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len)
  * the cpuinfo structure for later use.
  * No Validation done here, simply read/convert the BCRs
  */
-void __init read_decode_cache_bcr(void)
+void __cpuinit read_decode_cache_bcr(void)
 {
 	struct bcr_cache ibcr, dbcr;
 	struct cpuinfo_arc_cache *p_ic, *p_dc;
@@ -141,7 +141,7 @@ void __init read_decode_cache_bcr(void)
  * 3. Enable the Caches, setup default flush mode for D-Cache
  * 3. Calculate the SHMLBA used by user space
  */
-void __init arc_cache_init(void)
+void __cpuinit arc_cache_init(void)
 {
 	unsigned int temp;
 	unsigned int cpu = smp_processor_id();
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 9b9ce23f4ec3..f71a26d3f68e 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -434,7 +434,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddress,
  * the cpuinfo structure for later use.
  * No Validation is done here, simply read/convert the BCRs
  */
-void __init read_decode_mmu_bcr(void)
+void __cpuinit read_decode_mmu_bcr(void)
 {
 	unsigned int tmp;
 	struct bcr_mmu_1_2 *mmu2;	/* encoded MMU2 attr */
@@ -480,7 +480,7 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
 	return buf;
 }
 
-void __init arc_mmu_init(void)
+void __cpuinit arc_mmu_init(void)
 {
 	char str[256];
 	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
-- 
cgit v1.2.3-70-g09d2


From e3edeb67fbd6c522a46a844c569fc41a8a2b6876 Mon Sep 17 00:00:00 2001
From: Noam Camus <noamc@ezchip.com>
Date: Tue, 26 Feb 2013 09:22:46 +0200
Subject: ARC: Respect the cpu_id passed for fetching correct cpu info

Signed-off-by: Noam Camus <noamc@ezchip.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/tlb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index f71a26d3f68e..c03364af9363 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -466,7 +466,7 @@ void __cpuinit read_decode_mmu_bcr(void)
 char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
 {
 	int n = 0;
-	struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+	struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu;
 
 	n += scnprintf(buf + n, len - n, "ARC700 MMU [v%x]\t: %dk PAGE, ",
 		       p_mmu->ver, TO_KB(p_mmu->pg_sz));
-- 
cgit v1.2.3-70-g09d2


From 24603fdd19d978fcc0d089d92370ee1aa3a71e84 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 11 Apr 2013 18:36:35 +0530
Subject: ARC: [mm] optimise icache flush for user mappings

ARC icache doesn't snoop dcache thus executable pages need to be made
coherent before mapping into userspace in flush_icache_page().

However ARC700 CDU (hardware cache flush module) requires both vaddr
(index in cache) as well as paddr (tag match) to correctly identify a
line in the VIPT cache. A typical ARC700 SoC has aliasing icache, thus
the paddr only based flush_icache_page() API couldn't be implemented
efficiently. It had to loop thru all possible alias indexes and perform
the invalidate operation (ofcourse the cache op would only succeed at
the index(es) where tag matches - typically only 1, but the cost of
visiting all the cache-bins needs to paid nevertheless).

Turns out however that the vaddr (along with paddr) is available in
update_mmu_cache() hence better suits ARC icache flush semantics.
With both vaddr+paddr, exactly one flush operation per line is done.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/cacheflush.h | 10 +++++++++-
 arch/arc/mm/cache_arc700.c        | 14 +++-----------
 arch/arc/mm/tlb.c                 | 12 +++++++++---
 3 files changed, 21 insertions(+), 15 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h
index 97ee96f26505..46f13e7314dc 100644
--- a/arch/arc/include/asm/cacheflush.h
+++ b/arch/arc/include/asm/cacheflush.h
@@ -20,12 +20,20 @@
 
 #include <linux/mm.h>
 
+/*
+ * Semantically we need this because icache doesn't snoop dcache/dma.
+ * However ARC Cache flush requires paddr as well as vaddr, latter not available
+ * in the flush_icache_page() API. So we no-op it but do the equivalent work
+ * in update_mmu_cache()
+ */
+#define flush_icache_page(vma, page)
+
 void flush_cache_all(void);
 
 void flush_icache_range(unsigned long start, unsigned long end);
-void flush_icache_page(struct vm_area_struct *vma, struct page *page);
 void flush_icache_range_vaddr(unsigned long paddr, unsigned long u_vaddr,
 				     int len);
+void __inv_icache_page(unsigned long paddr, unsigned long vaddr);
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index c02aac649e84..a65c13942766 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -716,18 +716,10 @@ void flush_icache_range_vaddr(unsigned long paddr, unsigned long u_vaddr,
 	__dc_line_op(paddr, len, OP_FLUSH);
 }
 
-/*
- * XXX: This also needs to be optim using pg_arch_1
- * This is called when a page-cache page is about to be mapped into a
- * user process' address space.  It offers an opportunity for a
- * port to ensure d-cache/i-cache coherency if necessary.
- */
-void flush_icache_page(struct vm_area_struct *vma, struct page *page)
+/* wrapper to compile time eliminate alignment checks in flush loop */
+void __inv_icache_page(unsigned long paddr, unsigned long vaddr)
 {
-	if (!(vma->vm_flags & VM_EXEC))
-		return;
-
-	__ic_line_inv((unsigned long)page_address(page), PAGE_SIZE);
+	__ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE);
 }
 
 void flush_icache_all(void)
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index c03364af9363..086be526072a 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -422,12 +422,18 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
  * when a new PTE is entered in Page Tables or an existing one
  * is modified. We aggresively pre-install a TLB entry
  */
-
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddress,
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
 		      pte_t *ptep)
 {
+	unsigned long vaddr = vaddr_unaligned & PAGE_MASK;
+
+	create_tlb(vma, vaddr, ptep);
 
-	create_tlb(vma, vaddress, ptep);
+	/* icache doesn't snoop dcache, thus needs to be made coherent here */
+	if (vma->vm_flags & VM_EXEC) {
+		unsigned long paddr =  pte_val(*ptep) & PAGE_MASK;
+		__inv_icache_page(paddr, vaddr);
+	}
 }
 
 /* Read the Cache Build Confuration Registers, Decode them and save into
-- 
cgit v1.2.3-70-g09d2


From 7586bf7286097cd47299c44192c30e01f0d55391 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 12 Apr 2013 12:18:25 +0530
Subject: ARC: [mm] optimise icache flush for kernel mappings

This change continues the theme from prev commit - this time icache
handling for kernel's own code modification (vmalloc: loadable modules,
breakpoints for kprobes/kgdb...)

flush_icache_range() calls the CDU icache helper with vaddr to enable
exact line invalidate.

For a true kernel-virtual mapping, the vaddr is actually virtual hence
valid as index into cache. For kprobes breakpoint however, the vaddr arg
is actually paddr - since that's how normal kernel is mapped in ARC
memory map.  This implies that CDU will use the same addr for
indexing as for tag match - which is fine since kernel code would only
have that "implicit" mapping and none other.

This should speed up module loading significantly - specially on default
ARC700 icache configurations (32k) which alias.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/cache_arc700.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index a65c13942766..5651e7bd3b7e 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -642,8 +642,8 @@ void dma_cache_wback(unsigned long start, unsigned long sz)
 EXPORT_SYMBOL(dma_cache_wback);
 
 /*
- * This is API for making I/D Caches consistent when modifying code
- * (loadable modules, kprobes,  etc)
+ * This is API for making I/D Caches consistent when modifying
+ * kernel code (loadable modules, kprobes, kgdb...)
  * This is called on insmod, with kernel virtual address for CODE of
  * the module. ARC cache maintenance ops require PHY address thus we
  * need to convert vmalloc addr to PHY addr
@@ -673,7 +673,13 @@ void flush_icache_range(unsigned long kstart, unsigned long kend)
 
 	/* Case: Kernel Phy addr (0x8000_0000 onwards) */
 	if (likely(kstart > PAGE_OFFSET)) {
-		__ic_line_inv(kstart, kend - kstart);
+		/*
+		 * The 2nd arg despite being paddr will be used to index icache
+		 * This is OK since no alternate virtual mappings will exist
+		 * given the callers for this case: kprobe/kgdb in built-in
+		 * kernel code only.
+		 */
+		__ic_line_inv_vaddr(kstart, kstart, kend - kstart);
 		__dc_line_op(kstart, kend - kstart, OP_FLUSH);
 		return;
 	}
@@ -694,7 +700,7 @@ void flush_icache_range(unsigned long kstart, unsigned long kend)
 		sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);
 		local_irq_save(flags);
 		__dc_line_op(phy, sz, OP_FLUSH);
-		__ic_line_inv(phy, sz);
+		__ic_line_inv_vaddr(phy, kstart, sz);
 		local_irq_restore(flags);
 		kstart += sz;
 		tot_sz -= sz;
-- 
cgit v1.2.3-70-g09d2


From 94bad1afeeefbd1b27d7f642de12c04339501a99 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 12 Apr 2013 12:20:23 +0530
Subject: ARC: [mm] consolidate icache/dcache sync code

Now that we have same helper used for all icache invalidates (i.e.
vaddr+paddr based exact line invalidate), consolidate the open coded
calls into one place.

Also rename flush_icache_range_vaddr => __sync_icache_dcache

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/cacheflush.h |  5 ++---
 arch/arc/mm/cache_arc700.c        | 32 ++++++++++++++++----------------
 2 files changed, 18 insertions(+), 19 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h
index 46f13e7314dc..65ed8d2d4597 100644
--- a/arch/arc/include/asm/cacheflush.h
+++ b/arch/arc/include/asm/cacheflush.h
@@ -31,8 +31,7 @@
 void flush_cache_all(void);
 
 void flush_icache_range(unsigned long start, unsigned long end);
-void flush_icache_range_vaddr(unsigned long paddr, unsigned long u_vaddr,
-				     int len);
+void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len);
 void __inv_icache_page(unsigned long paddr, unsigned long vaddr);
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
@@ -66,7 +65,7 @@ void dma_cache_wback(unsigned long start, unsigned long sz);
 do {									\
 	memcpy(dst, src, len);						\
 	if (vma->vm_flags & VM_EXEC)					\
-		flush_icache_range_vaddr((unsigned long)(dst), vaddr, len);\
+		__sync_icache_dcache((unsigned long)(dst), vaddr, len);	\
 } while (0)
 
 #define copy_from_user_page(vma, page, vaddr, dst, src, len)		\
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index 5651e7bd3b7e..5aaa955a3aac 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -652,7 +652,6 @@ void flush_icache_range(unsigned long kstart, unsigned long kend)
 {
 	unsigned int tot_sz, off, sz;
 	unsigned long phy, pfn;
-	unsigned long flags;
 
 	/* printk("Kernel Cache Cohenercy: %lx to %lx\n",kstart, kend); */
 
@@ -679,8 +678,7 @@ void flush_icache_range(unsigned long kstart, unsigned long kend)
 		 * given the callers for this case: kprobe/kgdb in built-in
 		 * kernel code only.
 		 */
-		__ic_line_inv_vaddr(kstart, kstart, kend - kstart);
-		__dc_line_op(kstart, kend - kstart, OP_FLUSH);
+		__sync_icache_dcache(kstart, kstart, kend - kstart);
 		return;
 	}
 
@@ -698,28 +696,30 @@ void flush_icache_range(unsigned long kstart, unsigned long kend)
 		pfn = vmalloc_to_pfn((void *)kstart);
 		phy = (pfn << PAGE_SHIFT) + off;
 		sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);
-		local_irq_save(flags);
-		__dc_line_op(phy, sz, OP_FLUSH);
-		__ic_line_inv_vaddr(phy, kstart, sz);
-		local_irq_restore(flags);
+		__sync_icache_dcache(phy, kstart, sz);
 		kstart += sz;
 		tot_sz -= sz;
 	}
 }
 
 /*
- * Optimised ver of flush_icache_range() with spec callers: ptrace/signals
- * where vaddr is also available. This allows passing both vaddr and paddr
- * bits to CDU for cache flush, short-circuting the current pessimistic algo
- * which kills all possible aliases.
- * An added adv of knowing that vaddr is user-vaddr avoids various checks
- * and handling for k-vaddr, k-paddr as done in orig ver above
+ * General purpose helper to make I and D cache lines consistent.
+ * @paddr is phy addr of region
+ * @vaddr is typically user or kernel vaddr (vmalloc)
+ *    Howver in one instance, flush_icache_range() by kprobe (for a breakpt in
+ *    builtin kernel code) @vaddr will be paddr only, meaning CDU operation will
+ *    use a paddr to index the cache (despite VIPT). This is fine since since a
+ *    built-in kernel page will not have any virtual mappings (not even kernel)
+ *    kprobe on loadable module is different as it will have kvaddr.
  */
-void flush_icache_range_vaddr(unsigned long paddr, unsigned long u_vaddr,
-			      int len)
+void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len)
 {
-	__ic_line_inv_vaddr(paddr, u_vaddr, len);
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__ic_line_inv_vaddr(paddr, vaddr, len);
 	__dc_line_op(paddr, len, OP_FLUSH);
+	local_irq_restore(flags);
 }
 
 /* wrapper to compile time eliminate alignment checks in flush loop */
-- 
cgit v1.2.3-70-g09d2


From 7f250a0fa1cc7f8d97560f4ea36eae38c17eb648 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 12 Apr 2013 13:08:06 +0530
Subject: ARC: [mm] remove the pessimistic all-alias-invalidate icache helpers

No users of this code anymore - so RIP !

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/cache_arc700.c | 201 ++++-----------------------------------------
 1 file changed, 17 insertions(+), 184 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index 5aaa955a3aac..da9de401681d 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -72,16 +72,6 @@
 #include <asm/cachectl.h>
 #include <asm/setup.h>
 
-
-#ifdef CONFIG_ARC_HAS_ICACHE
-static void __ic_line_inv_no_alias(unsigned long, int);
-static void __ic_line_inv_2_alias(unsigned long, int);
-static void __ic_line_inv_4_alias(unsigned long, int);
-
-/* Holds the ptr to flush routine, dependign on size due to aliasing issues */
-static void (*___flush_icache_rtn) (unsigned long, int);
-#endif
-
 char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len)
 {
 	int n = 0;
@@ -171,30 +161,6 @@ void __cpuinit arc_cache_init(void)
 
 	}
 #endif
-
-	/*
-	 * if Cache way size is <= page size then no aliasing exhibited
-	 * otherwise ratio determines num of aliases.
-	 * e.g. 32K I$, 2 way set assoc, 8k pg size
-	 *       way-sz = 32k/2 = 16k
-	 *       way-pg-ratio = 16k/8k = 2, so 2 aliases possible
-	 *       (meaning 1 line could be in 2 possible locations).
-	 */
-	way_pg_ratio = ic->sz / ARC_ICACHE_WAYS / PAGE_SIZE;
-	switch (way_pg_ratio) {
-	case 0:
-	case 1:
-		___flush_icache_rtn = __ic_line_inv_no_alias;
-		break;
-	case 2:
-		___flush_icache_rtn = __ic_line_inv_2_alias;
-		break;
-	case 4:
-		___flush_icache_rtn = __ic_line_inv_4_alias;
-		break;
-	default:
-		panic("Unsupported I-Cache Sz\n");
-	}
 #endif
 
 	/* Enable/disable I-Cache */
@@ -391,75 +357,38 @@ static inline void __dc_line_op(unsigned long start, unsigned long sz,
 /*
  *		I-Cache Aliasing in ARC700 VIPT caches
  *
- * For fetching code from I$, ARC700 uses vaddr (embedded in program code)
- * to "index" into SET of cache-line and paddr from MMU to match the TAG
- * in the WAYS of SET.
- *
- * However the CDU iterface (to flush/inv) lines from software, only takes
- * paddr (to have simpler hardware interface). For simpler cases, using paddr
- * alone suffices.
- * e.g. 2-way-set-assoc, 16K I$ (8k MMU pg sz, 32b cache line size):
- *      way_sz = cache_sz / num_ways = 16k/2 = 8k
- *      num_sets = way_sz / line_sz = 8k/32 = 256 => 8 bits
- *   Ignoring the bottom 5 bits corresp to the off within a 32b cacheline,
- *   bits req for calc set-index = bits 12:5 (0 based). Since this range fits
- *   inside the bottom 13 bits of paddr, which are same for vaddr and paddr
- *   (with 8k pg sz), paddr alone can be safely used by CDU to unambigously
- *   locate a cache-line.
+ * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
+ * The orig Cache Management Module "CDU" only required paddr to invalidate a
+ * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
+ * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
+ * the exact same line.
  *
- * However for a difft sized cache, say 32k I$, above math yields need
- * for 14 bits of vaddr to locate a cache line, which can't be provided by
- * paddr, since the bit 13 (0 based) might differ between the two.
- *
- * This lack of extra bits needed for correct line addressing, defines the
- * classical problem of Cache aliasing with VIPT architectures
- * num_aliases = 1 << extra_bits
- * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz => 2 aliases
- *      2-way-set-assoc, 64K I$ with 8k MMU pg sz => 4 aliases
- *      2-way-set-assoc, 16K I$ with 8k MMU pg sz => NO aliases
+ * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
+ * paddr alone could not be used to correctly index the cache.
  *
  * ------------------
  * MMU v1/v2 (Fixed Page Size 8k)
  * ------------------
  * The solution was to provide CDU with these additonal vaddr bits. These
- * would be bits [x:13], x would depend on cache-geom.
+ * would be bits [x:13], x would depend on cache-geometry, 13 comes from
+ * standard page size of 8k.
  * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
  * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
  * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
  * represent the offset within cache-line. The adv of using this "clumsy"
- * interface for additional info was no new reg was needed in CDU.
+ * interface for additional info was no new reg was needed in CDU programming
+ * model.
  *
  * 17:13 represented the max num of bits passable, actual bits needed were
  * fewer, based on the num-of-aliases possible.
  * -for 2 alias possibility, only bit 13 needed (32K cache)
  * -for 4 alias possibility, bits 14:13 needed (64K cache)
  *
- * Since vaddr was not available for all instances of I$ flush req by core
- * kernel, the only safe way (non-optimal though) was to kill all possible
- * lines which could represent an alias (even if they didnt represent one
- * in execution).
- * e.g. for 64K I$, 4 aliases possible, so we did
- *      flush start
- *      flush start | 0x01
- *      flush start | 0x2
- *      flush start | 0x3
- *
- * The penalty was invoking the operation itself, since tag match is anyways
- * paddr based, a line which didn't represent an alias would not match the
- * paddr, hence wont be killed
- *
- * Note that aliasing concerns are independent of line-sz for a given cache
- * geometry (size + set_assoc) because the extra bits required by line-sz are
- * reduced from the set calc.
- * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz and using math above
- *  32b line-sz: 9 bits set-index-calc, 5 bits offset-in-line => 1 extra bit
- *  64b line-sz: 8 bits set-index-calc, 6 bits offset-in-line => 1 extra bit
- *
  * ------------------
  * MMU v3
  * ------------------
- * This ver of MMU supports var page sizes (1k-16k) - Linux will support
- * 8k (default), 16k and 4k.
+ * This ver of MMU supports variable page sizes (1k-16k): although Linux will
+ * only support 8k (default), 16k and 4k.
  * However from hardware perspective, smaller page sizes aggrevate aliasing
  * meaning more vaddr bits needed to disambiguate the cache-line-op ;
  * the existing scheme of piggybacking won't work for certain configurations.
@@ -468,105 +397,10 @@ static inline void __dc_line_op(unsigned long start, unsigned long sz,
  */
 
 /***********************************************************
- * Machine specific helpers for per line I-Cache invalidate.
- * 3 routines to accpunt for 1, 2, 4 aliases possible
- */
-
-static void __ic_line_inv_no_alias(unsigned long start, int num_lines)
-{
-	while (num_lines-- > 0) {
-#if (CONFIG_ARC_MMU_VER > 2)
-		write_aux_reg(ARC_REG_IC_PTAG, start);
-#endif
-		write_aux_reg(ARC_REG_IC_IVIL, start);
-		start += ARC_ICACHE_LINE_LEN;
-	}
-}
-
-static void __ic_line_inv_2_alias(unsigned long start, int num_lines)
-{
-	while (num_lines-- > 0) {
-
-#if (CONFIG_ARC_MMU_VER > 2)
-		/*
-		 *  MMU v3, CDU prog model (for line ops) now uses a new IC_PTAG
-		 * reg to pass the "tag" bits and existing IVIL reg only looks
-		 * at bits relevant for "index" (details above)
-		 * Programming Notes:
-		 * -when writing tag to PTAG reg, bit chopping can be avoided,
-		 *  CDU ignores non-tag bits.
-		 * -Ideally "index" must be computed from vaddr, but it is not
-		 *  avail in these rtns. So to be safe, we kill the lines in all
-		 *  possible indexes corresp to num of aliases possible for
-		 *  given cache config.
-		 */
-		write_aux_reg(ARC_REG_IC_PTAG, start);
-		write_aux_reg(ARC_REG_IC_IVIL,
-				  start & ~(0x1 << PAGE_SHIFT));
-		write_aux_reg(ARC_REG_IC_IVIL, start | (0x1 << PAGE_SHIFT));
-#else
-		write_aux_reg(ARC_REG_IC_IVIL, start);
-		write_aux_reg(ARC_REG_IC_IVIL, start | 0x01);
-#endif
-		start += ARC_ICACHE_LINE_LEN;
-	}
-}
-
-static void __ic_line_inv_4_alias(unsigned long start, int num_lines)
-{
-	while (num_lines-- > 0) {
-
-#if (CONFIG_ARC_MMU_VER > 2)
-		write_aux_reg(ARC_REG_IC_PTAG, start);
-
-		write_aux_reg(ARC_REG_IC_IVIL,
-				  start & ~(0x3 << PAGE_SHIFT));
-		write_aux_reg(ARC_REG_IC_IVIL,
-				  start & ~(0x2 << PAGE_SHIFT));
-		write_aux_reg(ARC_REG_IC_IVIL,
-				  start & ~(0x1 << PAGE_SHIFT));
-		write_aux_reg(ARC_REG_IC_IVIL, start | (0x3 << PAGE_SHIFT));
-#else
-		write_aux_reg(ARC_REG_IC_IVIL, start);
-		write_aux_reg(ARC_REG_IC_IVIL, start | 0x01);
-		write_aux_reg(ARC_REG_IC_IVIL, start | 0x02);
-		write_aux_reg(ARC_REG_IC_IVIL, start | 0x03);
-#endif
-		start += ARC_ICACHE_LINE_LEN;
-	}
-}
-
-static void __ic_line_inv(unsigned long start, unsigned long sz)
-{
-	unsigned long flags;
-	int num_lines, slack;
-
-	/*
-	 * Ensure we properly floor/ceil the non-line aligned/sized requests
-	 * and have @start - aligned to cache line, and integral @num_lines
-	 * However page sized flushes can be compile time optimised.
-	 *  -@start will be cache-line aligned already (being page aligned)
-	 *  -@sz will be integral multiple of line size (being page sized).
-	 */
-	if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
-		slack = start & ~ICACHE_LINE_MASK;
-		sz += slack;
-		start -= slack;
-	}
-
-	num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
-
-	local_irq_save(flags);
-	(*___flush_icache_rtn) (start, num_lines);
-	local_irq_restore(flags);
-}
-
-/* Unlike routines above, having vaddr for flush op (along with paddr),
- * prevents the need to speculatively kill the lines in multiple sets
- * based on ratio of way_sz : pg_sz
+ * Machine specific helper for per line I-Cache invalidate.
  */
-static void __ic_line_inv_vaddr(unsigned long phy_start,
-					 unsigned long vaddr, unsigned long sz)
+static void __ic_line_inv_vaddr(unsigned long phy_start, unsigned long vaddr,
+				unsigned long sz)
 {
 	unsigned long flags;
 	int num_lines, slack;
@@ -595,7 +429,7 @@ static void __ic_line_inv_vaddr(unsigned long phy_start,
 		write_aux_reg(ARC_REG_IC_IVIL, vaddr);
 		vaddr += ARC_ICACHE_LINE_LEN;
 #else
-		/* this paddr contains vaddrs bits as needed */
+		/* paddr contains stuffed vaddrs bits */
 		write_aux_reg(ARC_REG_IC_IVIL, addr);
 #endif
 		addr += ARC_ICACHE_LINE_LEN;
@@ -605,7 +439,6 @@ static void __ic_line_inv_vaddr(unsigned long phy_start,
 
 #else
 
-#define __ic_line_inv(start, sz)
 #define __ic_line_inv_vaddr(pstart, vstart, sz)
 
 #endif /* CONFIG_ARC_HAS_ICACHE */
-- 
cgit v1.2.3-70-g09d2


From 764531cc5a9261de9f74fd85c30dfb7837af2797 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 12 Apr 2013 15:32:06 +0530
Subject: ARC: [mm] micro-optimize page size icache invalidate

start address is already page aligned and size is const PAGE_SIZE,
thus fixups for alignment not needed in generated code.

bloat-o-meter vmlinux-mm5 vmlinux
add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-32 (-32)
function                                     old     new   delta
__inv_icache_page                             82      50     -32

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/cache_arc700.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index da9de401681d..3a9ef63b1a97 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -406,9 +406,18 @@ static void __ic_line_inv_vaddr(unsigned long phy_start, unsigned long vaddr,
 	int num_lines, slack;
 	unsigned int addr;
 
-	slack = phy_start & ~ICACHE_LINE_MASK;
-	sz += slack;
-	phy_start -= slack;
+	/*
+	 * Ensure we properly floor/ceil the non-line aligned/sized requests:
+	 * However page sized flushes can be compile time optimised.
+	 *  -@phy_start will be cache-line aligned already (being page aligned)
+	 *  -@sz will be integral multiple of line size (being page sized).
+	 */
+	if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
+		slack = phy_start & ~ICACHE_LINE_MASK;
+		sz += slack;
+		phy_start -= slack;
+	}
+
 	num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
 
 #if (CONFIG_ARC_MMU_VER > 2)
-- 
cgit v1.2.3-70-g09d2


From eacd0e950dc2100af54f2a94ae29105bf48ab921 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 16 Apr 2013 14:10:48 +0530
Subject: ARC: [mm] Lazy D-cache flush (non aliasing VIPT)

flush_dcache_page( ) is MM hook to ensure that a page has consistent
views between kernel and userspace. Thus it is called when

* kernel writes to a page which at some later point could get mapped to
  userspace (so kernel mapping needs to be flushed-n-inv)
* kernel is about to read from a page with possible userspace mappings
  (so userspace mappings needs to be made coherent with kernel ones)

However for Non aliasing VIPT dcache, any userspace mapping will always
be congruent to kernel mapping. Thus d-cache need need not be flushed at
all (or delayed indefinitely).

The only reason it does need to be flushed is when mapping code pages.
Since icache doesn't snoop dcache, those dirty dcache lines need to be
written back to memory and icache line invalidated so that icache lines
fetch will get the right data.

Decent gains on LMBench fork/exec/sh and File I/O micro-benchmarks.

(1) FPGA @ 80 MHZ

Processor, Processes - times in microseconds - smaller is better
------------------------------------------------------------------------------
Host                 OS  Mhz null null      open slct sig  sig  fork exec sh
                             call  I/O stat clos TCP  inst hndl proc proc proc
--------- ------------- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
3.9-rc6-a Linux 3.9.0-r   80 4.79 8.72 66.7 116. 239. 8.39 30.4 4798 14.K 34.K
3.9-rc6-b Linux 3.9.0-r   80 4.79 8.62 65.4 111. 239. 8.35 29.0 3995 12.K 30.K
3.9-rc7-c Linux 3.9.0-r   80 4.79 9.00 66.1 106. 239. 8.61 30.4 2858 10.K 24.K
                                                                ^^^^ ^^^^ ^^^

File & VM system latencies in microseconds - smaller is better
-------------------------------------------------------------------------------
Host                 OS   0K File      10K File     Mmap    Prot   Page 100fd
                        Create Delete Create Delete Latency Fault  Fault selct
--------- ------------- ------ ------ ------ ------ ------- ----- ------- -----
3.9-rc6-a Linux 3.9.0-r  317.8  204.2 1122.3  375.1 3522.0 4.288     20.7 126.8
3.9-rc6-b Linux 3.9.0-r  298.7  223.0 1141.6  367.8 3531.0 4.866     20.9 126.4
3.9-rc7-c Linux 3.9.0-r  278.4  179.2  862.1  339.3 3705.0 3.223     20.3 126.6
                         ^^^^^  ^^^^^  ^^^^^  ^^^^

(2) Customer Silicon @ 500 MHz (166 MHz mem)

------------------------------------------------------------------------------
Host                 OS  Mhz null null      open slct sig  sig  fork exec sh
                             call  I/O stat clos TCP  inst hndl proc proc proc
--------- ------------- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
abilis-ba Linux 3.9.0-r  497 0.71 1.38 4.58 12.0 35.5 1.40 3.89 2070 5525 13.K
abilis-ca Linux 3.9.0-r  497 0.71 1.40 4.61 11.8 35.6 1.37 3.92 1411 4317 10.K
                                                                ^^^^ ^^^^ ^^^

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/cacheflush.h |  1 +
 arch/arc/mm/cache_arc700.c        |  9 +++++++--
 arch/arc/mm/tlb.c                 | 18 +++++++++++++-----
 3 files changed, 21 insertions(+), 7 deletions(-)

(limited to 'arch/arc/mm')

diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h
index 65ed8d2d4597..ee1f6eae82d2 100644
--- a/arch/arc/include/asm/cacheflush.h
+++ b/arch/arc/include/asm/cacheflush.h
@@ -33,6 +33,7 @@ void flush_cache_all(void);
 void flush_icache_range(unsigned long start, unsigned long end);
 void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len);
 void __inv_icache_page(unsigned long paddr, unsigned long vaddr);
+void __flush_dcache_page(unsigned long paddr);
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index 3a9ef63b1a97..c854cf95f706 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -457,10 +457,10 @@ static void __ic_line_inv_vaddr(unsigned long phy_start, unsigned long vaddr,
  * Exported APIs
  */
 
-/* TBD: use pg_arch_1 to optimize this */
 void flush_dcache_page(struct page *page)
 {
-	__dc_line_op((unsigned long)page_address(page), PAGE_SIZE, OP_FLUSH);
+	/* Make a note that dcache is not yet flushed for this page */
+	set_bit(PG_arch_1, &page->flags);
 }
 EXPORT_SYMBOL(flush_dcache_page);
 
@@ -570,6 +570,11 @@ void __inv_icache_page(unsigned long paddr, unsigned long vaddr)
 	__ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE);
 }
 
+void __flush_dcache_page(unsigned long paddr)
+{
+	__dc_line_op(paddr, PAGE_SIZE, OP_FLUSH_N_INV);
+}
+
 void flush_icache_all(void)
 {
 	unsigned long flags;
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 086be526072a..003d69ac6ffa 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -418,9 +418,10 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 	local_irq_restore(flags);
 }
 
-/* arch hook called by core VM at the end of handle_mm_fault( ),
- * when a new PTE is entered in Page Tables or an existing one
- * is modified. We aggresively pre-install a TLB entry
+/*
+ * Called at the end of pagefault, for a userspace mapped page
+ *  -pre-install the corresponding TLB entry into MMU
+ *  -Finalize the delayed D-cache flush (wback+inv kernel mapping)
  */
 void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
 		      pte_t *ptep)
@@ -431,8 +432,15 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
 
 	/* icache doesn't snoop dcache, thus needs to be made coherent here */
 	if (vma->vm_flags & VM_EXEC) {
-		unsigned long paddr =  pte_val(*ptep) & PAGE_MASK;
-		__inv_icache_page(paddr, vaddr);
+		struct page *page = pfn_to_page(pte_pfn(*ptep));
+
+		/* if page was dcache dirty, flush now */
+		int dirty = test_and_clear_bit(PG_arch_1, &page->flags);
+		if (dirty) {
+			unsigned long paddr =  pte_val(*ptep) & PAGE_MASK;
+			__flush_dcache_page(paddr);
+			__inv_icache_page(paddr, vaddr);
+		}
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2