From bb6d59ca927d855ffac567b35c0a790c67016103 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 11 Mar 2009 23:33:18 +0900 Subject: x86: unify kmap_atomic_pfn() and iomap_atomic_prot_pfn() kmap_atomic_pfn() and iomap_atomic_prot_pfn() are almost same except pgprot. This patch removes the code duplication for these two functions. Signed-off-by: Akinobu Mita LKML-Reference: <20090311143317.GA22244@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/mm/highmem_32.c | 17 +++++++++++------ arch/x86/mm/iomap_32.c | 13 ++----------- 2 files changed, 13 insertions(+), 17 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index d11745334a67..ae4c8dae2669 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -121,23 +121,28 @@ void kunmap_atomic(void *kvaddr, enum km_type type) pagefault_enable(); } -/* This is the same as kmap_atomic() but can map memory that doesn't - * have a struct page associated with it. - */ -void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) +void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) { enum fixed_addresses idx; unsigned long vaddr; pagefault_disable(); - idx = type + KM_TYPE_NR*smp_processor_id(); + idx = type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); + set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); arch_flush_lazy_mmu_mode(); return (void*) vaddr; } + +/* This is the same as kmap_atomic() but can map memory that doesn't + * have a struct page associated with it. + */ +void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) +{ + return kmap_atomic_prot_pfn(pfn, type, kmap_prot); +} EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ struct page *kmap_atomic_to_page(void *ptr) diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 04102d42ff42..592984e5496b 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -18,6 +18,7 @@ #include #include +#include #include int is_io_mapping_possible(resource_size_t base, unsigned long size) @@ -36,11 +37,6 @@ EXPORT_SYMBOL_GPL(is_io_mapping_possible); void * iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) { - enum fixed_addresses idx; - unsigned long vaddr; - - pagefault_disable(); - /* * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the @@ -50,12 +46,7 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) prot = PAGE_KERNEL_UC_MINUS; - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte-idx, pfn_pte(pfn, prot)); - arch_flush_lazy_mmu_mode(); - - return (void*) vaddr; + return kmap_atomic_prot_pfn(pfn, type, prot); } EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); -- cgit v1.2.3-70-g09d2 From 12074fa1073013dd11f1cff41db018d5cff4ecd9 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 11 Mar 2009 23:34:50 +0900 Subject: x86: debug check for kmap_atomic_pfn and iomap_atomic_prot_pfn() It may be useful for kmap_atomic_pfn() and iomap_atomic_prot_pfn() to check invalid kmap usage as well as kmap_atomic. Signed-off-by: Akinobu Mita LKML-Reference: <20090311143449.GB22244@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/mm/highmem_32.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index ae4c8dae2669..f256e73542d7 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -128,6 +128,8 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) pagefault_disable(); + debug_kmap_atomic_prot(type); + idx = type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); -- cgit v1.2.3-70-g09d2 From afcfe024aebd74b0984a41af9a34e009cf5badaf Mon Sep 17 00:00:00 2001 From: Stuart Bennett Date: Wed, 11 Mar 2009 20:29:45 +0000 Subject: x86: mmiotrace: quieten spurious warning message This message was being incorrectly emitted when using gdb, so compile it out by default for now; there will be a better fix in v2.6.30. Reported-by: Ingo Molnar Signed-off-by: Stuart Bennett Acked-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 6a518dd08a36..4f115e00486b 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -310,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) { - pr_warning("kmmio: spurious debug trap on CPU %d.\n", + pr_debug("kmmio: spurious debug trap on CPU %d.\n", smp_processor_id()); goto out; } -- cgit v1.2.3-70-g09d2 From dd63fdcc63f0f853b116b52e56200a0e0227cf5f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 13 Mar 2009 03:20:49 +0100 Subject: x86: unify kmap_atomic_pfn() and iomap_atomic_prot_pfn(), fix Impact: build fix Move kmap_atomic_prot_pfn() to iomap_32.c. It is used on all 32-bit kernels, while highmem_32.c is only built on highmem kernels. ( Note: the debug_kmap_atomic_prot() check is removed for now, that problem is handled via another patch. ) Reported-by: Thomas Gleixner Cc: Akinobu Mita LKML-Reference: <20090311143317.GA22244@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/mm/highmem_32.c | 20 ++------------------ arch/x86/mm/iomap_32.c | 18 +++++++++++++++++- 2 files changed, 19 insertions(+), 19 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index f256e73542d7..522db5e3d0bf 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -121,24 +121,8 @@ void kunmap_atomic(void *kvaddr, enum km_type type) pagefault_enable(); } -void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) -{ - enum fixed_addresses idx; - unsigned long vaddr; - - pagefault_disable(); - - debug_kmap_atomic_prot(type); - - idx = type + KM_TYPE_NR * smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); - arch_flush_lazy_mmu_mode(); - - return (void*) vaddr; -} - -/* This is the same as kmap_atomic() but can map memory that doesn't +/* + * This is the same as kmap_atomic() but can map memory that doesn't * have a struct page associated with it. */ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 592984e5496b..6e60ba698cee 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -32,7 +32,23 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size) } EXPORT_SYMBOL_GPL(is_io_mapping_possible); -/* Map 'pfn' using fixed map 'type' and protections 'prot' +void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) +{ + enum fixed_addresses idx; + unsigned long vaddr; + + pagefault_disable(); + + idx = type + KM_TYPE_NR * smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); + arch_flush_lazy_mmu_mode(); + + return (void *)vaddr; +} + +/* + * Map 'pfn' using fixed map 'type' and protections 'prot' */ void * iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) -- cgit v1.2.3-70-g09d2 From 13c6c53282d99c82e79b02477efd2c1e30a991ef Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 12 Mar 2009 12:37:34 +0000 Subject: x86, 32-bit: also use cpuinfo_x86's x86_{phys,virt}_bits members Impact: 32/64-bit consolidation In a first step, this allows fixing phys_addr_valid() for PAE (which until now reported all addresses to be valid). Subsequently, this will also allow simplifying some MTRR handling code. Signed-off-by: Jan Beulich LKML-Reference: <49B9101E.76E4.0078.0@novell.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/cpu/common.c | 12 +++++++++++- arch/x86/kernel/cpu/intel.c | 5 +++++ arch/x86/mm/ioremap.c | 17 ++++++++--------- 4 files changed, 25 insertions(+), 11 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 76139506c3e4..bd3406db1d68 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -75,9 +75,9 @@ struct cpuinfo_x86 { #else /* Number of 4K pages in DTLB/ITLB combined(in pages): */ int x86_tlbsize; +#endif __u8 x86_virt_bits; __u8 x86_phys_bits; -#endif /* CPUID returned core id bits: */ __u8 x86_coreid_bits; /* Max extended CPUID function supported: */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 826d5c876278..a95e9480bb9c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -549,13 +549,15 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) } } -#ifdef CONFIG_X86_64 if (c->extended_cpuid_level >= 0x80000008) { u32 eax = cpuid_eax(0x80000008); c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; } +#ifdef CONFIG_X86_32 + else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36)) + c->x86_phys_bits = 36; #endif if (c->extended_cpuid_level >= 0x80000007) @@ -602,8 +604,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; + c->x86_phys_bits = 36; + c->x86_virt_bits = 48; #else c->x86_clflush_size = 32; + c->x86_phys_bits = 32; + c->x86_virt_bits = 32; #endif c->x86_cache_alignment = c->x86_clflush_size; @@ -726,9 +732,13 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_coreid_bits = 0; #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; + c->x86_phys_bits = 36; + c->x86_virt_bits = 48; #else c->cpuid_level = -1; /* CPUID not detected */ c->x86_clflush_size = 32; + c->x86_phys_bits = 32; + c->x86_virt_bits = 32; #endif c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 191117f1ad51..ae769471042e 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -54,6 +54,11 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) c->x86_cache_alignment = 128; #endif + /* CPUID workaround for 0F33/0F34 CPU */ + if (c->x86 == 0xF && c->x86_model == 0x3 + && (c->x86_mask == 0x3 || c->x86_mask == 0x4)) + c->x86_phys_bits = 36; + /* * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate * with P/T states and does not stop in deep C-states diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index aca924a30ee6..83ed74affba9 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -22,13 +22,17 @@ #include #include -#ifdef CONFIG_X86_64 - -static inline int phys_addr_valid(unsigned long addr) +static inline int phys_addr_valid(resource_size_t addr) { - return addr < (1UL << boot_cpu_data.x86_phys_bits); +#ifdef CONFIG_PHYS_ADDR_T_64BIT + return !(addr >> boot_cpu_data.x86_phys_bits); +#else + return 1; +#endif } +#ifdef CONFIG_X86_64 + unsigned long __phys_addr(unsigned long x) { if (x >= __START_KERNEL_map) { @@ -65,11 +69,6 @@ EXPORT_SYMBOL(__virt_addr_valid); #else -static inline int phys_addr_valid(unsigned long addr) -{ - return 1; -} - #ifdef CONFIG_DEBUG_VIRTUAL unsigned long __phys_addr(unsigned long x) { -- cgit v1.2.3-70-g09d2 From dc9dd5cc854cde110d2421f3a11fec7597e059c1 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 12 Mar 2009 12:40:06 +0000 Subject: x86: move save_mr() into .meminit.text Impact: cleanup, save memory The function is only being called from boot or memory hotplug paths. Signed-off-by: Jan Beulich LKML-Reference: <49B910B6.76E4.0078.0@novell.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 15219e0d1243..fd3da1dda1c9 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -94,9 +94,9 @@ struct map_range { #define NR_RANGE_MR 5 #endif -static int save_mr(struct map_range *mr, int nr_range, - unsigned long start_pfn, unsigned long end_pfn, - unsigned long page_size_mask) +static int __meminit save_mr(struct map_range *mr, int nr_range, + unsigned long start_pfn, unsigned long end_pfn, + unsigned long page_size_mask) { if (start_pfn < end_pfn) { if (nr_range >= NR_RANGE_MR) -- cgit v1.2.3-70-g09d2 From 698609bdcd35d0641f4c6622c83680ab1a6d67cb Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 12 Mar 2009 13:11:50 +0000 Subject: x86: create a non-zero sized bm_pte only when needed Impact: kernel image size reduction Since in most configurations the pmd page needed maps the same range of virtual addresses which is also mapped by the earlier inserted one for covering FIX_DBGP_BASE, that page (and its insertion in the page tables) can be avoided altogether by detecting the condition at compile time. Signed-off-by: Jan Beulich LKML-Reference: <49B91826.76E4.0078.0@novell.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 83ed74affba9..55e127f71ed9 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -487,7 +487,12 @@ static int __init early_ioremap_debug_setup(char *str) early_param("early_ioremap_debug", early_ioremap_debug_setup); static __initdata int after_paging_init; -static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; +#define __FIXADDR_TOP (-PAGE_SIZE) +static pte_t bm_pte[(__fix_to_virt(FIX_DBGP_BASE) + ^ __fix_to_virt(FIX_BTMAP_BEGIN)) >> PMD_SHIFT + ? PAGE_SIZE / sizeof(pte_t) : 0] __page_aligned_bss; +#undef __FIXADDR_TOP +static __initdata pte_t *bm_ptep; static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) { @@ -502,6 +507,8 @@ static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) static inline pte_t * __init early_ioremap_pte(unsigned long addr) { + if (!sizeof(bm_pte)) + return &bm_ptep[pte_index(addr)]; return &bm_pte[pte_index(addr)]; } @@ -519,8 +526,14 @@ void __init early_ioremap_init(void) slot_virt[i] = fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); - memset(bm_pte, 0, sizeof(bm_pte)); - pmd_populate_kernel(&init_mm, pmd, bm_pte); + if (sizeof(bm_pte)) { + memset(bm_pte, 0, sizeof(bm_pte)); + pmd_populate_kernel(&init_mm, pmd, bm_pte); + } else { + bm_ptep = pte_offset_kernel(pmd, 0); + if (early_ioremap_debug) + printk(KERN_INFO "bm_ptep=%p\n", bm_ptep); + } /* * The boot-ioremap range spans multiple pmds, for which -- cgit v1.2.3-70-g09d2 From 4bb9c5c02153dfc89a6c73a6f32091413805ad7d Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Thu, 12 Mar 2009 17:45:27 -0700 Subject: VM, x86, PAT: Change is_linear_pfn_mapping to not use vm_pgoff Impact: fix false positive PAT warnings - also fix VirtalBox hang Use of vma->vm_pgoff to identify the pfnmaps that are fully mapped at mmap time is broken. vm_pgoff is set by generic mmap code even for cases where drivers are setting up the mappings at the fault time. The problem was originally reported here: http://marc.info/?l=linux-kernel&m=123383810628583&w=2 Change is_linear_pfn_mapping logic to overload VM_INSERTPAGE flag along with VM_PFNMAP to mean full PFNMAP setup at mmap time. Problem also tracked at: http://bugzilla.kernel.org/show_bug.cgi?id=12800 Reported-by: Thomas Hellstrom Tested-by: Frans Pop Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha @intel.com> Cc: Nick Piggin Cc: "ebiederm@xmission.com" Cc: # only for 2.6.29.1, not .28 LKML-Reference: <20090313004527.GA7176@linux-os.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 5 +++-- include/linux/mm.h | 15 +++++++++++++-- mm/memory.c | 6 ++++-- 3 files changed, 20 insertions(+), 6 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index e0ab173b6974..21bc1f787ae2 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -641,10 +641,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, is_ram = pat_pagerange_is_ram(paddr, paddr + size); /* - * reserve_pfn_range() doesn't support RAM pages. + * reserve_pfn_range() doesn't support RAM pages. Maintain the current + * behavior with RAM pages by returning success. */ if (is_ram != 0) - return -EINVAL; + return 0; ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); if (ret) diff --git a/include/linux/mm.h b/include/linux/mm.h index 065cdf8c09fb..3daa05feed9f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -98,7 +98,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ -#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ +#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it. Refer note in VM_PFNMAP_AT_MMAP below */ #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ @@ -126,6 +126,17 @@ extern unsigned int kobjsize(const void *objp); */ #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) +/* + * pfnmap vmas that are fully mapped at mmap time (not mapped on fault). + * Used by x86 PAT to identify such PFNMAP mappings and optimize their handling. + * Note VM_INSERTPAGE flag is overloaded here. i.e, + * VM_INSERTPAGE && !VM_PFNMAP implies + * The vma has had "vm_insert_page()" done on it + * VM_INSERTPAGE && VM_PFNMAP implies + * The vma is PFNMAP with full mapping at mmap time + */ +#define VM_PFNMAP_AT_MMAP (VM_INSERTPAGE | VM_PFNMAP) + /* * mapping from the currently active vm_flags protection bits (the * low four bits) to a page protection mask.. @@ -145,7 +156,7 @@ extern pgprot_t protection_map[16]; */ static inline int is_linear_pfn_mapping(struct vm_area_struct *vma) { - return ((vma->vm_flags & VM_PFNMAP) && vma->vm_pgoff); + return ((vma->vm_flags & VM_PFNMAP_AT_MMAP) == VM_PFNMAP_AT_MMAP); } static inline int is_pfn_mapping(struct vm_area_struct *vma) diff --git a/mm/memory.c b/mm/memory.c index baa999e87cd2..d7df5babcba9 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1665,9 +1665,10 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, * behaviour that some programs depend on. We mark the "original" * un-COW'ed pages by matching them up with "vma->vm_pgoff". */ - if (addr == vma->vm_start && end == vma->vm_end) + if (addr == vma->vm_start && end == vma->vm_end) { vma->vm_pgoff = pfn; - else if (is_cow_mapping(vma->vm_flags)) + vma->vm_flags |= VM_PFNMAP_AT_MMAP; + } else if (is_cow_mapping(vma->vm_flags)) return -EINVAL; vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; @@ -1679,6 +1680,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, * needed from higher level routine calling unmap_vmas */ vma->vm_flags &= ~(VM_IO | VM_RESERVED | VM_PFNMAP); + vma->vm_flags &= ~VM_PFNMAP_AT_MMAP; return -EINVAL; } -- cgit v1.2.3-70-g09d2 From 93dbda7cbcd70a0bd1a99f39f44a9ccde8ab9040 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 26 Feb 2009 17:35:44 -0800 Subject: x86: add brk allocation for very, very early allocations Impact: new interface Add a brk()-like allocator which effectively extends the bss in order to allow very early code to do dynamic allocations. This is better than using statically allocated arrays for data in subsystems which may never get used. The space for brk allocations is in the bss ELF segment, so that the space is mapped properly by the code which maps the kernel, and so that bootloaders keep the space free rather than putting a ramdisk or something into it. The bss itself, delimited by __bss_stop, ends before the brk area (__brk_base to __brk_limit). The kernel text, data and bss is reserved up to __bss_stop. Any brk-allocated data is reserved separately just before the kernel pagetable is built, as that code allocates from unreserved spaces in the e820 map, potentially allocating from any unused brk memory. Ultimately any unused memory in the brk area is used in the general kernel memory pool. Initially the brk space is set to 1MB, which is probably much larger than any user needs (the largest current user is i386 head_32.S's code to build the pagetables to map the kernel, which can get fairly large with a big kernel image and no PSE support). So long as the system has sufficient memory for the bootloader to reserve the kernel+1MB brk, there are no bad effects resulting from an over-large brk. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/sections.h | 7 +++++++ arch/x86/include/asm/setup.h | 4 ++++ arch/x86/kernel/head32.c | 2 +- arch/x86/kernel/head64.c | 2 +- arch/x86/kernel/setup.c | 39 ++++++++++++++++++++++++++++++++++----- arch/x86/kernel/vmlinux_32.lds.S | 7 +++++++ arch/x86/kernel/vmlinux_64.lds.S | 5 +++++ arch/x86/mm/pageattr.c | 5 +++-- arch/x86/xen/mmu.c | 6 +++--- 9 files changed, 65 insertions(+), 12 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 2b8c5160388f..1b7ee5d673c2 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -1 +1,8 @@ +#ifndef _ASM_X86_SECTIONS_H +#define _ASM_X86_SECTIONS_H + #include + +extern char __brk_base[], __brk_limit[]; + +#endif /* _ASM_X86_SECTIONS_H */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 05c6f6b11fd5..45454f3fa121 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -100,6 +100,10 @@ extern struct boot_params boot_params; */ #define LOWMEMSIZE() (0x9f000) +/* exceedingly early brk-like allocator */ +extern unsigned long _brk_end; +void *extend_brk(size_t size, size_t align); + #ifdef __i386__ void __init i386_start_kernel(void); diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index ac108d1fe182..29f1095b0849 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -18,7 +18,7 @@ void __init i386_start_kernel(void) { reserve_trampoline_memory(); - reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); + reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index f5b272247690..70eaa852c732 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -100,7 +100,7 @@ void __init x86_64_start_reservations(char *real_mode_data) reserve_trampoline_memory(); - reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); + reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f28c56e6bf94..e6b742d2a3f5 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -114,6 +114,9 @@ unsigned int boot_cpu_id __read_mostly; +static __initdata unsigned long _brk_start = (unsigned long)__brk_base; +unsigned long _brk_end = (unsigned long)__brk_base; + #ifdef CONFIG_X86_64 int default_cpu_present_to_apicid(int mps_cpu) { @@ -337,6 +340,34 @@ static void __init relocate_initrd(void) } #endif +void * __init extend_brk(size_t size, size_t align) +{ + size_t mask = align - 1; + void *ret; + + BUG_ON(_brk_start == 0); + BUG_ON(align & mask); + + _brk_end = (_brk_end + mask) & ~mask; + BUG_ON((char *)(_brk_end + size) > __brk_limit); + + ret = (void *)_brk_end; + _brk_end += size; + + memset(ret, 0, size); + + return ret; +} + +static void __init reserve_brk(void) +{ + if (_brk_end > _brk_start) + reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK"); + + /* Mark brk area as locked down and no longer taking any new allocations */ + _brk_start = 0; +} + static void __init reserve_initrd(void) { u64 ramdisk_image = boot_params.hdr.ramdisk_image; @@ -717,11 +748,7 @@ void __init setup_arch(char **cmdline_p) init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; -#ifdef CONFIG_X86_32 - init_mm.brk = init_pg_tables_end + PAGE_OFFSET; -#else - init_mm.brk = (unsigned long) &_end; -#endif + init_mm.brk = _brk_end; code_resource.start = virt_to_phys(_text); code_resource.end = virt_to_phys(_etext)-1; @@ -842,6 +869,8 @@ void __init setup_arch(char **cmdline_p) setup_bios_corruption_check(); #endif + reserve_brk(); + /* max_pfn_mapped is updated here */ max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn< #include #include +#include #include #include #include @@ -95,7 +96,7 @@ static inline unsigned long highmap_start_pfn(void) static inline unsigned long highmap_end_pfn(void) { - return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; + return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; } #endif @@ -711,7 +712,7 @@ static int cpa_process_alias(struct cpa_data *cpa) * No need to redo, when the primary call touched the high * mapping already: */ - if (within(vaddr, (unsigned long) _text, (unsigned long) _end)) + if (within(vaddr, (unsigned long) _text, _brk_end)) return 0; /* diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index cb6afa4ec95c..72f6a76dbfb9 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1723,9 +1723,9 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, { pmd_t *kernel_pmd; - init_pg_tables_start = __pa(pgd); - init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; - max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); + max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + + xen_start_info->nr_pt_frames * PAGE_SIZE + + 512*1024); kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); -- cgit v1.2.3-70-g09d2 From 0920dce7d5889634faa336f65833ee44f3b7651e Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 16 Mar 2009 00:15:18 +0900 Subject: x86, mm: remove unnecessary include file from iomap_32.c asm/highmem.h inclusion is added to use kmap_atomic_prot_pfn() by commit bb6d59ca927d855ffac567b35c0a790c67016103 Now kmap_atomic_prot_pfn is moved to iomap_32.c by commit dd63fdcc63f0f853b116b52e56200a0e0227cf5f So the asm/highmem.h inclusion in iomap_32.c is unnecessary now. Signed-off-by: Akinobu Mita LKML-Reference: <20090315151517.GA29074@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/mm/iomap_32.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 6e60ba698cee..699c9b2895ae 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -18,7 +18,6 @@ #include #include -#include #include int is_io_mapping_possible(resource_size_t base, unsigned long size) -- cgit v1.2.3-70-g09d2 From ce4e240c279a31096f74afa6584a62d64a1ba8c8 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 17 Mar 2009 10:16:54 -0800 Subject: x86: add x2apic_wrmsr_fence() to x2apic flush tlb paths Impact: optimize APIC IPI related barriers Uncached MMIO accesses for xapic are inherently serializing and hence we don't need explicit barriers for xapic IPI paths. x2apic MSR writes/reads don't have serializing semantics and hence need a serializing instruction or mfence, to make all the previous memory stores globally visisble before the x2apic msr write for IPI. Add x2apic_wrmsr_fence() in flush tlb path to x2apic specific paths. Signed-off-by: Suresh Siddha Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Jens Axboe Cc: Linus Torvalds Cc: "Paul E. McKenney" Cc: Rusty Russell Cc: Steven Rostedt Cc: "steiner@sgi.com" Cc: Nick Piggin LKML-Reference: <1237313814.27006.203.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 10 ++++++++++ arch/x86/kernel/apic/x2apic_cluster.c | 6 ++++++ arch/x86/kernel/apic/x2apic_phys.c | 6 ++++++ arch/x86/mm/tlb.c | 5 ----- 4 files changed, 22 insertions(+), 5 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 6d5b6f0900e1..00f5962d82d0 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -108,6 +108,16 @@ extern void native_apic_icr_write(u32 low, u32 id); extern u64 native_apic_icr_read(void); #ifdef CONFIG_X86_X2APIC +/* + * Make previous memory operations globally visible before + * sending the IPI through x2apic wrmsr. We need a serializing instruction or + * mfence for this. + */ +static inline void x2apic_wrmsr_fence(void) +{ + asm volatile("mfence" : : : "memory"); +} + static inline void native_apic_msr_write(u32 reg, u32 v) { if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 8fb87b6dd633..4a903e2f0d17 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -57,6 +57,8 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) unsigned long query_cpu; unsigned long flags; + x2apic_wrmsr_fence(); + local_irq_save(flags); for_each_cpu(query_cpu, mask) { __x2apic_send_IPI_dest( @@ -73,6 +75,8 @@ static void unsigned long query_cpu; unsigned long flags; + x2apic_wrmsr_fence(); + local_irq_save(flags); for_each_cpu(query_cpu, mask) { if (query_cpu == this_cpu) @@ -90,6 +94,8 @@ static void x2apic_send_IPI_allbutself(int vector) unsigned long query_cpu; unsigned long flags; + x2apic_wrmsr_fence(); + local_irq_save(flags); for_each_online_cpu(query_cpu) { if (query_cpu == this_cpu) diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 23625b9f98b2..a284359627e7 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -58,6 +58,8 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) unsigned long query_cpu; unsigned long flags; + x2apic_wrmsr_fence(); + local_irq_save(flags); for_each_cpu(query_cpu, mask) { __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), @@ -73,6 +75,8 @@ static void unsigned long query_cpu; unsigned long flags; + x2apic_wrmsr_fence(); + local_irq_save(flags); for_each_cpu(query_cpu, mask) { if (query_cpu != this_cpu) @@ -89,6 +93,8 @@ static void x2apic_send_IPI_allbutself(int vector) unsigned long query_cpu; unsigned long flags; + x2apic_wrmsr_fence(); + local_irq_save(flags); for_each_online_cpu(query_cpu) { if (query_cpu == this_cpu) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index a654d59e4483..821e97017e95 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -186,11 +186,6 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id())); - /* - * Make the above memory operations globally visible before - * sending the IPI. - */ - smp_mb(); /* * We have to send the IPI only to * CPUs affected. -- cgit v1.2.3-70-g09d2 From b40c757964bbad76ecfa88eda9eb0b4d76dd8b40 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 18 Mar 2009 13:03:32 -0700 Subject: x86/32: no need to use set_pte_present in set_pte_vaddr Impact: cleanup, remove last user of set_pte_present set_pte_vaddr() is only used to install ptes in fixmaps, and should never be used to overwrite a present mapping. Signed-off-by: Jeremy Fitzhardinge Cc: Xen-devel LKML-Reference: <1237406613-2929-1-git-send-email-jeremy@goop.org> Signed-off-by: Ingo Molnar --- arch/x86/mm/pgtable_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index f2e477c91c1b..46c8834aedc0 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -50,7 +50,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval) } pte = pte_offset_kernel(pmd, vaddr); if (pte_val(pteval)) - set_pte_present(&init_mm, vaddr, pte, pteval); + set_pte_at(&init_mm, vaddr, pte, pteval); else pte_clear(&init_mm, vaddr, pte); -- cgit v1.2.3-70-g09d2 From 728c9518873de0bbb92b66daa1943b12e5b9f80f Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Thu, 19 Mar 2009 14:51:13 -0700 Subject: x86, CPA: Add a flag parameter to cpa set_clr() Change change_page_attr_set_clr() array parameter to a flag. This helps following patches which adds an interface to change attr to uc/wb over a set of pages referred by struct page. Signed-off-by: Venkatesh Pallipadi Cc: arjan@infradead.org Cc: eric@anholt.net Cc: Venkatesh Pallipadi Cc: airlied@redhat.com LKML-Reference: <20090319215358.611346000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 1280565670e4..69009afa98c0 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -787,7 +787,7 @@ static inline int cache_attr(pgprot_t attr) static int change_page_attr_set_clr(unsigned long *addr, int numpages, pgprot_t mask_set, pgprot_t mask_clr, - int force_split, int array) + int force_split, int in_flag) { struct cpa_data cpa; int ret, cache, checkalias; @@ -802,7 +802,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, return 0; /* Ensure we are PAGE_SIZE aligned */ - if (!array) { + if (!(in_flag & CPA_ARRAY)) { if (*addr & ~PAGE_MASK) { *addr &= PAGE_MASK; /* @@ -840,7 +840,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, cpa.curpage = 0; cpa.force_split = force_split; - if (array) + if (in_flag & CPA_ARRAY) cpa.flags |= CPA_ARRAY; /* No alias checking for _NX bit modifications */ @@ -889,14 +889,14 @@ static inline int change_page_attr_set(unsigned long *addr, int numpages, pgprot_t mask, int array) { return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0, - array); + (array ? CPA_ARRAY : 0)); } static inline int change_page_attr_clear(unsigned long *addr, int numpages, pgprot_t mask, int array) { return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0, - array); + (array ? CPA_ARRAY : 0)); } int _set_memory_uc(unsigned long addr, int numpages) -- cgit v1.2.3-70-g09d2 From 9ae2847591c857bed44bc094b908b412bfa1b244 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Thu, 19 Mar 2009 14:51:14 -0700 Subject: x86, PAT: Add support for struct page pointer array in cpa set_clr Add struct page array pointer to cpa struct and CPA_PAGES_ARRAY. With that we can add change_page_attr_set_clr() a parameter to pass struct page array pointer and that can be handled by the underlying cpa code. cpa_flush_array() is also changed to support both addr array or struct page pointer array, depending on the flag. Signed-off-by: Venkatesh Pallipadi Cc: arjan@infradead.org Cc: eric@anholt.net Cc: airlied@redhat.com LKML-Reference: <20090319215358.758513000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 79 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 29 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 69009afa98c0..e5c257fb41e2 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -34,6 +34,7 @@ struct cpa_data { unsigned long pfn; unsigned force_split : 1; int curpage; + struct page **pages; }; /* @@ -46,6 +47,7 @@ static DEFINE_SPINLOCK(cpa_lock); #define CPA_FLUSHTLB 1 #define CPA_ARRAY 2 +#define CPA_PAGES_ARRAY 4 #ifdef CONFIG_PROC_FS static unsigned long direct_pages_count[PG_LEVEL_NUM]; @@ -202,10 +204,10 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache) } } -static void cpa_flush_array(unsigned long *start, int numpages, int cache) +static void cpa_flush_array(unsigned long *start, int numpages, int cache, + int in_flags, struct page **pages) { unsigned int i, level; - unsigned long *addr; BUG_ON(irqs_disabled()); @@ -226,14 +228,22 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache) * will cause all other CPUs to flush the same * cachelines: */ - for (i = 0, addr = start; i < numpages; i++, addr++) { - pte_t *pte = lookup_address(*addr, &level); + for (i = 0; i < numpages; i++) { + unsigned long addr; + pte_t *pte; + + if (in_flags & CPA_PAGES_ARRAY) + addr = (unsigned long)page_address(pages[i]); + else + addr = start[i]; + + pte = lookup_address(addr, &level); /* * Only flush present addresses: */ if (pte && (pte_val(*pte) & _PAGE_PRESENT)) - clflush_cache_range((void *) *addr, PAGE_SIZE); + clflush_cache_range((void *)addr, PAGE_SIZE); } } @@ -585,7 +595,9 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) unsigned int level; pte_t *kpte, old_pte; - if (cpa->flags & CPA_ARRAY) + if (cpa->flags & CPA_PAGES_ARRAY) + address = (unsigned long)page_address(cpa->pages[cpa->curpage]); + else if (cpa->flags & CPA_ARRAY) address = cpa->vaddr[cpa->curpage]; else address = *cpa->vaddr; @@ -688,7 +700,9 @@ static int cpa_process_alias(struct cpa_data *cpa) * No need to redo, when the primary call touched the direct * mapping already: */ - if (cpa->flags & CPA_ARRAY) + if (cpa->flags & CPA_PAGES_ARRAY) + vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]); + else if (cpa->flags & CPA_ARRAY) vaddr = cpa->vaddr[cpa->curpage]; else vaddr = *cpa->vaddr; @@ -699,7 +713,7 @@ static int cpa_process_alias(struct cpa_data *cpa) alias_cpa = *cpa; temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); alias_cpa.vaddr = &temp_cpa_vaddr; - alias_cpa.flags &= ~CPA_ARRAY; + alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); ret = __change_page_attr_set_clr(&alias_cpa, 0); @@ -725,7 +739,7 @@ static int cpa_process_alias(struct cpa_data *cpa) alias_cpa = *cpa; temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; alias_cpa.vaddr = &temp_cpa_vaddr; - alias_cpa.flags &= ~CPA_ARRAY; + alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); /* * The high mapping range is imprecise, so ignore the return value. @@ -746,7 +760,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) */ cpa->numpages = numpages; /* for array changes, we can't use large page */ - if (cpa->flags & CPA_ARRAY) + if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY)) cpa->numpages = 1; if (!debug_pagealloc) @@ -770,7 +784,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) */ BUG_ON(cpa->numpages > numpages); numpages -= cpa->numpages; - if (cpa->flags & CPA_ARRAY) + if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) cpa->curpage++; else *cpa->vaddr += cpa->numpages * PAGE_SIZE; @@ -787,7 +801,8 @@ static inline int cache_attr(pgprot_t attr) static int change_page_attr_set_clr(unsigned long *addr, int numpages, pgprot_t mask_set, pgprot_t mask_clr, - int force_split, int in_flag) + int force_split, int in_flag, + struct page **pages) { struct cpa_data cpa; int ret, cache, checkalias; @@ -802,15 +817,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, return 0; /* Ensure we are PAGE_SIZE aligned */ - if (!(in_flag & CPA_ARRAY)) { - if (*addr & ~PAGE_MASK) { - *addr &= PAGE_MASK; - /* - * People should not be passing in unaligned addresses: - */ - WARN_ON_ONCE(1); - } - } else { + if (in_flag & CPA_ARRAY) { int i; for (i = 0; i < numpages; i++) { if (addr[i] & ~PAGE_MASK) { @@ -818,6 +825,18 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, WARN_ON_ONCE(1); } } + } else if (!(in_flag & CPA_PAGES_ARRAY)) { + /* + * in_flag of CPA_PAGES_ARRAY implies it is aligned. + * No need to cehck in that case + */ + if (*addr & ~PAGE_MASK) { + *addr &= PAGE_MASK; + /* + * People should not be passing in unaligned addresses: + */ + WARN_ON_ONCE(1); + } } /* Must avoid aliasing mappings in the highmem code */ @@ -833,6 +852,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, arch_flush_lazy_mmu_mode(); cpa.vaddr = addr; + cpa.pages = pages; cpa.numpages = numpages; cpa.mask_set = mask_set; cpa.mask_clr = mask_clr; @@ -840,8 +860,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, cpa.curpage = 0; cpa.force_split = force_split; - if (in_flag & CPA_ARRAY) - cpa.flags |= CPA_ARRAY; + if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY)) + cpa.flags |= in_flag; /* No alias checking for _NX bit modifications */ checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; @@ -867,9 +887,10 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, * wbindv): */ if (!ret && cpu_has_clflush) { - if (cpa.flags & CPA_ARRAY) - cpa_flush_array(addr, numpages, cache); - else + if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { + cpa_flush_array(addr, numpages, cache, + cpa.flags, pages); + } else cpa_flush_range(*addr, numpages, cache); } else cpa_flush_all(cache); @@ -889,14 +910,14 @@ static inline int change_page_attr_set(unsigned long *addr, int numpages, pgprot_t mask, int array) { return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0, - (array ? CPA_ARRAY : 0)); + (array ? CPA_ARRAY : 0), NULL); } static inline int change_page_attr_clear(unsigned long *addr, int numpages, pgprot_t mask, int array) { return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0, - (array ? CPA_ARRAY : 0)); + (array ? CPA_ARRAY : 0), NULL); } int _set_memory_uc(unsigned long addr, int numpages) @@ -1044,7 +1065,7 @@ int set_memory_np(unsigned long addr, int numpages) int set_memory_4k(unsigned long addr, int numpages) { return change_page_attr_set_clr(&addr, numpages, __pgprot(0), - __pgprot(0), 1, 0); + __pgprot(0), 1, 0, NULL); } int set_pages_uc(struct page *page, int numpages) -- cgit v1.2.3-70-g09d2 From 0f3507555f6fa4acbc85a646d6e8766230db38fc Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Thu, 19 Mar 2009 14:51:15 -0700 Subject: x86, CPA: Add set_pages_arrayuc and set_pages_array_wb Add new interfaces: set_pages_array_uc() set_pages_array_wb() that can be used change the page attribute for a bunch of pages with flush etc done once at the end of all the changes. These interfaces are similar to existing set_memory_array_uc() and set_memory_array_wc(). Signed-off-by: Venkatesh Pallipadi Cc: arjan@infradead.org Cc: eric@anholt.net Cc: airlied@redhat.com LKML-Reference: <20090319215358.901545000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cacheflush.h | 3 ++ arch/x86/mm/pageattr.c | 63 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 5b301b7ff5f4..b3894bf52fcd 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -90,6 +90,9 @@ int set_memory_4k(unsigned long addr, int numpages); int set_memory_array_uc(unsigned long *addr, int addrinarray); int set_memory_array_wb(unsigned long *addr, int addrinarray); +int set_pages_array_uc(struct page **pages, int addrinarray); +int set_pages_array_wb(struct page **pages, int addrinarray); + /* * For legacy compatibility with the old APIs, a few functions * are provided that work on a "struct page". diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e5c257fb41e2..d71e1b636ce6 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -920,6 +920,20 @@ static inline int change_page_attr_clear(unsigned long *addr, int numpages, (array ? CPA_ARRAY : 0), NULL); } +static inline int cpa_set_pages_array(struct page **pages, int numpages, + pgprot_t mask) +{ + return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0, + CPA_PAGES_ARRAY, pages); +} + +static inline int cpa_clear_pages_array(struct page **pages, int numpages, + pgprot_t mask) +{ + return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0, + CPA_PAGES_ARRAY, pages); +} + int _set_memory_uc(unsigned long addr, int numpages) { /* @@ -1076,6 +1090,35 @@ int set_pages_uc(struct page *page, int numpages) } EXPORT_SYMBOL(set_pages_uc); +int set_pages_array_uc(struct page **pages, int addrinarray) +{ + unsigned long start; + unsigned long end; + int i; + int free_idx; + + for (i = 0; i < addrinarray; i++) { + start = (unsigned long)page_address(pages[i]); + end = start + PAGE_SIZE; + if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL)) + goto err_out; + } + + if (cpa_set_pages_array(pages, addrinarray, + __pgprot(_PAGE_CACHE_UC_MINUS)) == 0) { + return 0; /* Success */ + } +err_out: + free_idx = i; + for (i = 0; i < free_idx; i++) { + start = (unsigned long)page_address(pages[i]); + end = start + PAGE_SIZE; + free_memtype(start, end); + } + return -EINVAL; +} +EXPORT_SYMBOL(set_pages_array_uc); + int set_pages_wb(struct page *page, int numpages) { unsigned long addr = (unsigned long)page_address(page); @@ -1084,6 +1127,26 @@ int set_pages_wb(struct page *page, int numpages) } EXPORT_SYMBOL(set_pages_wb); +int set_pages_array_wb(struct page **pages, int addrinarray) +{ + int retval; + unsigned long start; + unsigned long end; + int i; + + retval = cpa_clear_pages_array(pages, addrinarray, + __pgprot(_PAGE_CACHE_MASK)); + + for (i = 0; i < addrinarray; i++) { + start = (unsigned long)page_address(pages[i]); + end = start + PAGE_SIZE; + free_memtype(start, end); + } + + return retval; +} +EXPORT_SYMBOL(set_pages_array_wb); + int set_pages_x(struct page *page, int numpages) { unsigned long addr = (unsigned long)page_address(page); -- cgit v1.2.3-70-g09d2 From 45c7b28f3c7e3a45cc5a597cc19816a9015ee8ae Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 20 Mar 2009 17:53:34 -0700 Subject: Revert "x86: create a non-zero sized bm_pte only when needed" This reverts commit 698609bdcd35d0641f4c6622c83680ab1a6d67cb. 69860 breaks Xen booting, as it relies on head*.S to set up the fixmap pagetables (as a side-effect of initializing the USB debug port). Xen, however, does not boot via head*.S, and so the fixmap area is not initialized. The specific symptom of the crash is a fault in dmi_scan(), because the pointer that early_ioremap returns is not actually present. Signed-off-by: Jeremy Fitzhardinge Cc: Jan Beulich LKML-Reference: <49C43A8E.5090203@goop.org> Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 55e127f71ed9..83ed74affba9 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -487,12 +487,7 @@ static int __init early_ioremap_debug_setup(char *str) early_param("early_ioremap_debug", early_ioremap_debug_setup); static __initdata int after_paging_init; -#define __FIXADDR_TOP (-PAGE_SIZE) -static pte_t bm_pte[(__fix_to_virt(FIX_DBGP_BASE) - ^ __fix_to_virt(FIX_BTMAP_BEGIN)) >> PMD_SHIFT - ? PAGE_SIZE / sizeof(pte_t) : 0] __page_aligned_bss; -#undef __FIXADDR_TOP -static __initdata pte_t *bm_ptep; +static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) { @@ -507,8 +502,6 @@ static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) static inline pte_t * __init early_ioremap_pte(unsigned long addr) { - if (!sizeof(bm_pte)) - return &bm_ptep[pte_index(addr)]; return &bm_pte[pte_index(addr)]; } @@ -526,14 +519,8 @@ void __init early_ioremap_init(void) slot_virt[i] = fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); - if (sizeof(bm_pte)) { - memset(bm_pte, 0, sizeof(bm_pte)); - pmd_populate_kernel(&init_mm, pmd, bm_pte); - } else { - bm_ptep = pte_offset_kernel(pmd, 0); - if (early_ioremap_debug) - printk(KERN_INFO "bm_ptep=%p\n", bm_ptep); - } + memset(bm_pte, 0, sizeof(bm_pte)); + pmd_populate_kernel(&init_mm, pmd, bm_pte); /* * The boot-ioremap range spans multiple pmds, for which -- cgit v1.2.3-70-g09d2 From 9f4f25c86ff2233dd98d4bd6968afb1ca66558a0 Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Wed, 25 Mar 2009 14:07:11 +0100 Subject: x86: early_ioremap_init(), use __fix_to_virt(), because we are sure it's safe Tetsuo Handa reported this link bug: | arch/x86/mm/built-in.o(.init.text+0x1831): In function `early_ioremap_init': | : undefined reference to `__this_fixmap_does_not_exist' | make: *** [.tmp_vmlinux1] Error 1 Commit:8827247ffcc9e880cbe4705655065cf011265157 used a variable (which would be optimized to constant) as fix_to_virt()'s parameter. It's depended on gcc's optimization and fails on old gcc. (Tetsuo used gcc 3.3) We can use __fix_to_vir() instead, because we know it's safe and don't need link time error reporting. Reported-by: Tetsuo Handa Signed-off-by: Wang Chen Cc: sfr@canb.auug.org.au LKML-Reference: <49C9FFEA.7060908@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 83ed74affba9..0dfa09d69e80 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -516,7 +516,7 @@ void __init early_ioremap_init(void) printk(KERN_INFO "early_ioremap_init()\n"); for (i = 0; i < FIX_BTMAPS_SLOTS; i++) - slot_virt[i] = fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); + slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); memset(bm_pte, 0, sizeof(bm_pte)); -- cgit v1.2.3-70-g09d2