diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-05 09:45:46 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-05 09:45:46 -0700 |
commit | 9e85ae6af6e907975f68d82ff127073ec024cb05 (patch) | |
tree | 3d3349b03da858e53ef8f8dce467e4a691eabf88 | |
parent | 6caffe21ddeaae4a9d18d46eed2445a8d269a1fe (diff) | |
parent | fa41ba0d08de7c975c3e94d0067553f9b934221f (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull s390 updates from Martin Schwidefsky:
"The first part of the s390 updates for 4.14:
- Add machine type 0x3906 for IBM z14
- Add IBM z14 TLB flushing improvements for KVM guests
- Exploit the TOD clock epoch extension to provide a continuous TOD
clock afer 2042/09/17
- Add NIAI spinlock hints for IBM z14
- Rework the vmcp driver and use CMA for the respone buffer of z/VM
CP commands
- Drop some s390 specific asm headers and use the generic version
- Add block discard for DASD-FBA devices under z/VM
- Add average request times to DASD statistics
- A few of those constify patches which seem to be in vogue right now
- Cleanup and bug fixes"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (50 commits)
s390/mm: avoid empty zero pages for KVM guests to avoid postcopy hangs
s390/dasd: Add discard support for FBA devices
s390/zcrypt: make CPRBX const
s390/uaccess: avoid mvcos jump label
s390/mm: use generic mm_hooks
s390/facilities: fix typo
s390/vmcp: simplify vmcp_response_free()
s390/topology: Remove the unused parent_node() macro
s390/dasd: Change unsigned long long to unsigned long
s390/smp: convert cpuhp_setup_state() return code to zero on success
s390: fix 'novx' early parameter handling
s390/dasd: add average request times to dasd statistics
s390/scm: use common completion path
s390/pci: log changes to uid checking
s390/vmcp: simplify vmcp_ioctl()
s390/vmcp: return -ENOTTY for unknown ioctl commands
s390/vmcp: split vmcp header file and move to uapi
s390/vmcp: make use of contiguous memory allocator
s390/cpcmd,vmcp: avoid GFP_DMA allocations
s390/vmcp: fix uaccess check and avoid undefined behavior
...
87 files changed, 1405 insertions, 694 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 591d48f3a7de..6996b7727b85 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4395,6 +4395,10 @@ decrease the size and leave more room for directly mapped kernel RAM. + vmcp_cma=nn[MG] [KNL,S390] + Sets the memory size reserved for contiguous memory + allocations for the vmcp device driver. + vmhalt= [KNL,S390] Perform z/VM CP command after system halt. Format: <command> diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 7eeb75d758c1..48af970320cb 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -222,6 +222,10 @@ config HAVE_MARCH_Z13_FEATURES def_bool n select HAVE_MARCH_ZEC12_FEATURES +config HAVE_MARCH_Z14_FEATURES + def_bool n + select HAVE_MARCH_Z13_FEATURES + choice prompt "Processor type" default MARCH_Z196 @@ -282,6 +286,14 @@ config MARCH_Z13 2964 series). The kernel will be slightly faster but will not work on older machines. +config MARCH_Z14 + bool "IBM z14" + select HAVE_MARCH_Z14_FEATURES + help + Select this to enable optimizations for IBM z14 (3906 series). + The kernel will be slightly faster but will not work on older + machines. + endchoice config MARCH_Z900_TUNE @@ -305,6 +317,9 @@ config MARCH_ZEC12_TUNE config MARCH_Z13_TUNE def_bool TUNE_Z13 || MARCH_Z13 && TUNE_DEFAULT +config MARCH_Z14_TUNE + def_bool TUNE_Z14 || MARCH_Z14 && TUNE_DEFAULT + choice prompt "Tune code generation" default TUNE_DEFAULT @@ -343,6 +358,9 @@ config TUNE_ZEC12 config TUNE_Z13 bool "IBM z13" +config TUNE_Z14 + bool "IBM z14" + endchoice config 64BIT diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 54e00526b8df..dac821cfcd43 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -31,7 +31,8 @@ mflags-$(CONFIG_MARCH_Z9_109) := -march=z9-109 mflags-$(CONFIG_MARCH_Z10) := -march=z10 mflags-$(CONFIG_MARCH_Z196) := -march=z196 mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12 -mflags-$(CONFIG_MARCH_Z13) := -march=z13 +mflags-$(CONFIG_MARCH_Z13) := -march=z13 +mflags-$(CONFIG_MARCH_Z14) := -march=z14 export CC_FLAGS_MARCH := $(mflags-y) @@ -44,7 +45,8 @@ cflags-$(CONFIG_MARCH_Z9_109_TUNE) += -mtune=z9-109 cflags-$(CONFIG_MARCH_Z10_TUNE) += -mtune=z10 cflags-$(CONFIG_MARCH_Z196_TUNE) += -mtune=z196 cflags-$(CONFIG_MARCH_ZEC12_TUNE) += -mtune=zEC12 -cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13 +cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13 +cflags-$(CONFIG_MARCH_Z14_TUNE) += -mtune=z14 cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index b3c88479feba..6e2c9f7e47fa 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -16,4 +16,5 @@ generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += preempt.h generic-y += trace_clock.h +generic-y += unaligned.h generic-y += word-at-a-time.h diff --git a/arch/s390/include/asm/cpcmd.h b/arch/s390/include/asm/cpcmd.h index 3dfadb5d648f..ca2b0624ad46 100644 --- a/arch/s390/include/asm/cpcmd.h +++ b/arch/s390/include/asm/cpcmd.h @@ -10,9 +10,8 @@ /* * the lowlevel function for cpcmd - * the caller of __cpcmd has to ensure that the response buffer is below 2 GB */ -extern int __cpcmd(const char *cmd, char *response, int rlen, int *response_code); +int __cpcmd(const char *cmd, char *response, int rlen, int *response_code); /* * cpcmd is the in-kernel interface for issuing CP commands @@ -25,8 +24,8 @@ extern int __cpcmd(const char *cmd, char *response, int rlen, int *response_code * response_code: return pointer for VM's error code * return value: the size of the response. The caller can check if the buffer * was large enough by comparing the return value and rlen - * NOTE: If the response buffer is not below 2 GB, cpcmd can sleep + * NOTE: If the response buffer is not in real storage, cpcmd can sleep */ -extern int cpcmd(const char *cmd, char *response, int rlen, int *response_code); +int cpcmd(const char *cmd, char *response, int rlen, int *response_code); #endif /* _ASM_S390_CPCMD_H */ diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h index c5befc5a3bf5..b71735eab23f 100644 --- a/arch/s390/include/asm/ebcdic.h +++ b/arch/s390/include/asm/ebcdic.h @@ -9,9 +9,7 @@ #ifndef _EBCDIC_H #define _EBCDIC_H -#ifndef _S390_TYPES_H -#include <types.h> -#endif +#include <linux/types.h> extern __u8 _ascebc_500[256]; /* ASCII -> EBCDIC 500 conversion table */ extern __u8 _ebcasc_500[256]; /* EBCDIC 500 -> ASCII conversion table */ diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index c92ed0170be2..65998a1f5d43 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -191,7 +191,7 @@ struct arch_elf_state { } while (0) #define CORE_DUMP_USE_REGSET -#define ELF_EXEC_PAGESIZE 4096 +#define ELF_EXEC_PAGESIZE PAGE_SIZE /* * This is the base location for PIE (ET_DYN with INTERP) loads. On diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index edb5161df7e2..6810bd757312 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -81,7 +81,7 @@ struct ipl_parameter_block { struct ipl_block_fcp fcp; struct ipl_block_ccw ccw; } ipl_info; -} __attribute__((packed,aligned(4096))); +} __packed __aligned(PAGE_SIZE); /* * IPL validity flags diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 8a5b082797f8..a6870ea6ea8b 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -95,46 +95,46 @@ struct lowcore { __u64 int_clock; /* 0x0310 */ __u64 mcck_clock; /* 0x0318 */ __u64 clock_comparator; /* 0x0320 */ + __u64 boot_clock[2]; /* 0x0328 */ /* Current process. */ - __u64 current_task; /* 0x0328 */ - __u8 pad_0x318[0x320-0x318]; /* 0x0330 */ - __u64 kernel_stack; /* 0x0338 */ + __u64 current_task; /* 0x0338 */ + __u64 kernel_stack; /* 0x0340 */ /* Interrupt, panic and restart stack. */ - __u64 async_stack; /* 0x0340 */ - __u64 panic_stack; /* 0x0348 */ - __u64 restart_stack; /* 0x0350 */ + __u64 async_stack; /* 0x0348 */ + __u64 panic_stack; /* 0x0350 */ + __u64 restart_stack; /* 0x0358 */ /* Restart function and parameter. */ - __u64 restart_fn; /* 0x0358 */ - __u64 restart_data; /* 0x0360 */ - __u64 restart_source; /* 0x0368 */ + __u64 restart_fn; /* 0x0360 */ + __u64 restart_data; /* 0x0368 */ + __u64 restart_source; /* 0x0370 */ /* Address space pointer. */ - __u64 kernel_asce; /* 0x0370 */ - __u64 user_asce; /* 0x0378 */ + __u64 kernel_asce; /* 0x0378 */ + __u64 user_asce; /* 0x0380 */ /* * The lpp and current_pid fields form a * 64-bit value that is set as program * parameter with the LPP instruction. */ - __u32 lpp; /* 0x0380 */ - __u32 current_pid; /* 0x0384 */ + __u32 lpp; /* 0x0388 */ + __u32 current_pid; /* 0x038c */ /* SMP info area */ - __u32 cpu_nr; /* 0x0388 */ - __u32 softirq_pending; /* 0x038c */ - __u64 percpu_offset; /* 0x0390 */ - __u64 vdso_per_cpu_data; /* 0x0398 */ - __u64 machine_flags; /* 0x03a0 */ - __u32 preempt_count; /* 0x03a8 */ - __u8 pad_0x03ac[0x03b0-0x03ac]; /* 0x03ac */ - __u64 gmap; /* 0x03b0 */ - __u32 spinlock_lockval; /* 0x03b8 */ - __u32 fpu_flags; /* 0x03bc */ - __u8 pad_0x03c0[0x0400-0x03c0]; /* 0x03c0 */ + __u32 cpu_nr; /* 0x0390 */ + __u32 softirq_pending; /* 0x0394 */ + __u64 percpu_offset; /* 0x0398 */ + __u64 vdso_per_cpu_data; /* 0x03a0 */ + __u64 machine_flags; /* 0x03a8 */ + __u32 preempt_count; /* 0x03b0 */ + __u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */ + __u64 gmap; /* 0x03b8 */ + __u32 spinlock_lockval; /* 0x03c0 */ + __u32 fpu_flags; /* 0x03c4 */ + __u8 pad_0x03c8[0x0400-0x03c8]; /* 0x03c8 */ /* Per cpu primary space access list */ __u32 paste[16]; /* 0x0400 */ diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h deleted file mode 100644 index b79813d9cf68..000000000000 --- a/arch/s390/include/asm/mman.h +++ /dev/null @@ -1,11 +0,0 @@ -/* - * S390 version - * - * Derived from "include/asm-i386/mman.h" - */ -#ifndef __S390_MMAN_H__ -#define __S390_MMAN_H__ - -#include <uapi/asm/mman.h> - -#endif /* __S390_MMAN_H__ */ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 24bc41622a98..72e9ca83a668 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -12,6 +12,7 @@ #include <linux/mm_types.h> #include <asm/tlbflush.h> #include <asm/ctl_reg.h> +#include <asm-generic/mm_hooks.h> static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) @@ -33,7 +34,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.use_cmma = 0; #endif switch (mm->context.asce_limit) { - case 1UL << 42: + case _REGION2_SIZE: /* * forked 3-level task, fall through to set new asce with new * mm->pgd @@ -49,12 +50,12 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_REGION1; break; - case 1UL << 53: + case _REGION1_SIZE: /* forked 4-level task, set new asce with new mm->pgd */ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_REGION2; break; - case 1UL << 31: + case _REGION3_SIZE: /* forked 2-level compat task, set new asce with new mm->pgd */ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; @@ -138,30 +139,4 @@ static inline void activate_mm(struct mm_struct *prev, set_user_asce(next); } -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) -{ -} - -static inline void arch_exit_mmap(struct mm_struct *mm) -{ -} - -static inline void arch_unmap(struct mm_struct *mm, - struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ -} - -static inline void arch_bprm_mm_init(struct mm_struct *mm, - struct vm_area_struct *vma) -{ -} - -static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, - bool write, bool execute, bool foreign) -{ - /* by default, allow everything */ - return true; -} #endif /* __S390_MMU_CONTEXT_H */ diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index 9d91cf3e427f..c8e211b9a002 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -72,7 +72,7 @@ union mci { u64 ar : 1; /* 33 access register validity */ u64 da : 1; /* 34 delayed access exception */ u64 : 1; /* 35 */ - u64 gs : 1; /* 36 guarded storage registers */ + u64 gs : 1; /* 36 guarded storage registers validity */ u64 : 5; /* 37-41 */ u64 pr : 1; /* 42 tod programmable register validity */ u64 fc : 1; /* 43 fp control register validity */ diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h index 42267a2fe29e..ca21b28a7b17 100644 --- a/arch/s390/include/asm/page-states.h +++ b/arch/s390/include/asm/page-states.h @@ -13,6 +13,7 @@ #define ESSA_SET_POT_VOLATILE 4 #define ESSA_SET_STABLE_RESIDENT 5 #define ESSA_SET_STABLE_IF_RESIDENT 6 +#define ESSA_SET_STABLE_NODAT 7 #define ESSA_MAX ESSA_SET_STABLE_IF_RESIDENT diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 624deaa44230..5d5c2b3500a4 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -10,10 +10,14 @@ #include <linux/const.h> #include <asm/types.h> +#define _PAGE_SHIFT 12 +#define _PAGE_SIZE (_AC(1, UL) << _PAGE_SHIFT) +#define _PAGE_MASK (~(_PAGE_SIZE - 1)) + /* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 12 -#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#define PAGE_SHIFT _PAGE_SHIFT +#define PAGE_SIZE _PAGE_SIZE +#define PAGE_MASK _PAGE_MASK #define PAGE_DEFAULT_ACC 0 #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) @@ -133,6 +137,9 @@ static inline int page_reset_referenced(unsigned long addr) struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); +void arch_set_page_dat(struct page *page, int order); +void arch_set_page_nodat(struct page *page, int order); +int arch_test_page_nodat(struct page *page); void arch_set_page_states(int make_stable); static inline int devmem_is_allowed(unsigned long pfn) @@ -145,16 +152,26 @@ static inline int devmem_is_allowed(unsigned long pfn) #endif /* !__ASSEMBLY__ */ -#define __PAGE_OFFSET 0x0UL -#define PAGE_OFFSET 0x0UL -#define __pa(x) (unsigned long)(x) -#define __va(x) (void *)(unsigned long)(x) -#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) -#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) +#define __PAGE_OFFSET 0x0UL +#define PAGE_OFFSET 0x0UL + +#define __pa(x) ((unsigned long)(x)) +#define __va(x) ((void *)(unsigned long)(x)) + +#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) #define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) + +#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define page_to_virt(page) pfn_to_virt(page_to_pfn(page)) +#define phys_to_pfn(kaddr) ((kaddr) >> PAGE_SHIFT) +#define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT) + +#define phys_to_page(kaddr) pfn_to_page(phys_to_pfn(kaddr)) +#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) + +#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) + #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index bb0ff1bb0c4a..a0d9167519b1 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -15,6 +15,8 @@ #include <linux/gfp.h> #include <linux/mm.h> +#define CRST_ALLOC_ORDER 2 + unsigned long *crst_table_alloc(struct mm_struct *); void crst_table_free(struct mm_struct *, unsigned long *); @@ -42,16 +44,16 @@ static inline void clear_table(unsigned long *s, unsigned long val, size_t n) static inline void crst_table_init(unsigned long *crst, unsigned long entry) { - clear_table(crst, entry, sizeof(unsigned long)*2048); + clear_table(crst, entry, _CRST_TABLE_SIZE); } static inline unsigned long pgd_entry_type(struct mm_struct *mm) { - if (mm->context.asce_limit <= (1UL << 31)) + if (mm->context.asce_limit <= _REGION3_SIZE) return _SEGMENT_ENTRY_EMPTY; - if (mm->context.asce_limit <= (1UL << 42)) + if (mm->context.asce_limit <= _REGION2_SIZE) return _REGION3_ENTRY_EMPTY; - if (mm->context.asce_limit <= (1UL << 53)) + if (mm->context.asce_limit <= _REGION1_SIZE) return _REGION2_ENTRY_EMPTY; return _REGION1_ENTRY_EMPTY; } @@ -119,7 +121,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) if (!table) return NULL; - if (mm->context.asce_limit == (1UL << 31)) { + if (mm->context.asce_limit == _REGION3_SIZE) { /* Forking a compat process with 2 page table levels */ if (!pgtable_pmd_page_ctor(virt_to_page(table))) { crst_table_free(mm, table); @@ -131,7 +133,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { - if (mm->context.asce_limit == (1UL << 31)) + if (mm->context.asce_limit == _REGION3_SIZE) pgtable_pmd_page_dtor(virt_to_page(pgd)); crst_table_free(mm, (unsigned long *) pgd); } @@ -158,4 +160,8 @@ static inline void pmd_populate(struct mm_struct *mm, extern void rcu_table_freelist_finish(void); +void vmem_map_init(void); +void *vmem_crst_alloc(unsigned long val); +pte_t *vmem_pte_alloc(void); + #endif /* _S390_PGALLOC_H */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 57057fb1cc07..dce708e061ea 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -11,19 +11,6 @@ #ifndef _ASM_S390_PGTABLE_H #define _ASM_S390_PGTABLE_H -/* - * The Linux memory management assumes a three-level page table setup. - * For s390 64 bit we use up to four of the five levels the hardware - * provides (region first tables are not used). - * - * The "pgd_xxx()" functions are trivial for a folded two-level - * setup: the pgd is never bad, and a pmd always exists (as it's folded - * into the pgd entry) - * - * This file contains the functions and defines necessary to modify and use - * the S390 page table tree. - */ -#ifndef __ASSEMBLY__ #include <linux/sched.h> #include <linux/mm_types.h> #include <linux/page-flags.h> @@ -34,9 +21,6 @@ extern pgd_t swapper_pg_dir[]; extern void paging_init(void); -extern void vmem_map_init(void); -pmd_t *vmem_pmd_alloc(void); -pte_t *vmem_pte_alloc(void); enum { PG_DIRECT_MAP_4K = 0, @@ -77,38 +61,6 @@ extern unsigned long zero_page_mask; #define __HAVE_COLOR_ZERO_PAGE /* TODO: s390 cannot support io_remap_pfn_range... */ -#endif /* !__ASSEMBLY__ */ - -/* - * PMD_SHIFT determines the size of the area a second-level page - * table can map - * PGDIR_SHIFT determines what a third-level page table entry can map - */ -#define PMD_SHIFT 20 -#define PUD_SHIFT 31 -#define P4D_SHIFT 42 -#define PGDIR_SHIFT 53 - -#define PMD_SIZE (1UL << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE-1)) -#define PUD_SIZE (1UL << PUD_SHIFT) -#define PUD_MASK (~(PUD_SIZE-1)) -#define P4D_SIZE (1UL << P4D_SHIFT) -#define P4D_MASK (~(P4D_SIZE-1)) -#define PGDIR_SIZE (1UL << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE-1)) - -/* - * entries per page directory level: the S390 is two-level, so - * we don't really have any PMD directory physically. - * for S390 segment-table entries are combined to one PGD - * that leads to 1024 pte per pgd - */ -#define PTRS_PER_PTE 256 -#define PTRS_PER_PMD 2048 -#define PTRS_PER_PUD 2048 -#define PTRS_PER_P4D 2048 -#define PTRS_PER_PGD 2048 #define FIRST_USER_ADDRESS 0UL @@ -123,7 +75,6 @@ extern unsigned long zero_page_mask; #define pgd_ERROR(e) \ printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e)) -#ifndef __ASSEMBLY__ /* * The vmalloc and module area will always be on the topmost area of the * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules. @@ -269,7 +220,7 @@ static inline int is_module_addr(void *addr) */ /* Bits in the segment/region table address-space-control-element */ -#define _ASCE_ORIGIN ~0xfffUL/* segment table origin */ +#define _ASCE_ORIGIN ~0xfffUL/* region/segment table origin */ #define _ASCE_PRIVATE_SPACE 0x100 /* private space control */ #define _ASCE_ALT_EVENT 0x80 /* storage alteration event control */ #define _ASCE_SPACE_SWITCH 0x40 /* space switch event */ @@ -320,9 +271,9 @@ static inline int is_module_addr(void *addr) #define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL #define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ -#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ -#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ -#define _SEGMENT_ENTRY_NOEXEC 0x100 /* region no-execute bit */ +#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* page table origin */ +#define _SEGMENT_ENTRY_PROTECT 0x200 /* segment protection bit */ +#define _SEGMENT_ENTRY_NOEXEC 0x100 /* segment no-execute bit */ #define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY (0) @@ -340,6 +291,54 @@ static inline int is_module_addr(void *addr) #define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */ #endif +#define _CRST_ENTRIES 2048 /* number of region/segment table entries */ +#define _PAGE_ENTRIES 256 /* number of page table entries */ + +#define _CRST_TABLE_SIZE (_CRST_ENTRIES * 8) +#define _PAGE_TABLE_SIZE (_PAGE_ENTRIES * 8) + +#define _REGION1_SHIFT 53 +#define _REGION2_SHIFT 42 +#define _REGION3_SHIFT 31 +#define _SEGMENT_SHIFT 20 + +#define _REGION1_INDEX (0x7ffUL << _REGION1_SHIFT) +#define _REGION2_INDEX (0x7ffUL << _REGION2_SHIFT) +#define _REGION3_INDEX (0x7ffUL << _REGION3_SHIFT) +#define _SEGMENT_INDEX (0x7ffUL << _SEGMENT_SHIFT) +#define _PAGE_INDEX (0xffUL << _PAGE_SHIFT) + +#define _REGION1_SIZE (1UL << _REGION1_SHIFT) +#define _REGION2_SIZE (1UL << _REGION2_SHIFT) +#define _REGION3_SIZE (1UL << _REGION3_SHIFT) +#define _SEGMENT_SIZE (1UL << _SEGMENT_SHIFT) + +#define _REGION1_MASK (~(_REGION1_SIZE - 1)) +#define _REGION2_MASK (~(_REGION2_SIZE - 1)) +#define _REGION3_MASK (~(_REGION3_SIZE - 1)) +#define _SEGMENT_MASK (~(_SEGMENT_SIZE - 1)) + +#define PMD_SHIFT _SEGMENT_SHIFT +#define PUD_SHIFT _REGION3_SHIFT +#define P4D_SHIFT _REGION2_SHIFT +#define PGDIR_SHIFT _REGION1_SHIFT + +#define PMD_SIZE _SEGMENT_SIZE +#define PUD_SIZE _REGION3_SIZE +#define P4D_SIZE _REGION2_SIZE +#define PGDIR_SIZE _REGION1_SIZE + +#define PMD_MASK _SEGMENT_MASK +#define PUD_MASK _REGION3_MASK +#define P4D_MASK _REGION2_MASK +#define PGDIR_MASK _REGION1_MASK + +#define PTRS_PER_PTE _PAGE_ENTRIES +#define PTRS_PER_PMD _CRST_ENTRIES +#define PTRS_PER_PUD _CRST_ENTRIES +#define PTRS_PER_P4D _CRST_ENTRIES +#define PTRS_PER_PGD _CRST_ENTRIES + /* * Segment table and region3 table entry encoding * (R = read-only, I = invalid, y = young bit): @@ -376,6 +375,7 @@ static inline int is_module_addr(void *addr) /* Guest Page State used for virtualization */ #define _PGSTE_GPS_ZERO 0x0000000080000000UL +#define _PGSTE_GPS_NODAT 0x0000000040000000UL #define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL #define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL #define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL @@ -505,7 +505,7 @@ static inline int mm_alloc_pgste(struct mm_struct *mm) * In the case that a guest uses storage keys * faults should no longer be backed by zero pages */ -#define mm_forbids_zeropage mm_use_skey +#define mm_forbids_zeropage mm_has_pgste static inline int mm_use_skey(struct mm_struct *mm) { #ifdef CONFIG_PGSTE @@ -952,15 +952,30 @@ static inline pte_t pte_mkhuge(pte_t pte) #define IPTE_GLOBAL 0 #define IPTE_LOCAL 1 -static inline void __ptep_ipte(unsigned long address, pte_t *ptep, int local) +#define IPTE_NODAT 0x400 +#define IPTE_GUEST_ASCE 0x800 + +static inline void __ptep_ipte(unsigned long address, pte_t *ptep, + unsigned long opt, unsigned long asce, + int local) { unsigned long pto = (unsigned long) ptep; - /* Invalidation + TLB flush for the pte */ + if (__builtin_constant_p(opt) && opt == 0) { + /* Invalidation + TLB flush for the pte */ + asm volatile( + " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]" + : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address), + [m4] "i" (local)); + return; + } + + /* Invalidate ptes with options + TLB flush of the ptes */ + opt = opt | (asce & _ASCE_ORIGIN); asm volatile( - " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]" - : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address), - [m4] "i" (local)); + " .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]" + : [r2] "+a" (address), [r3] "+a" (opt) + : [r1] "a" (pto), [m4] "i" (local) : "memory"); } static inline void __ptep_ipte_range(unsigned long address, int nr, @@ -1341,31 +1356,61 @@ static inline void __pmdp_csp(pmd_t *pmdp) #define IDTE_GLOBAL 0 #define IDTE_LOCAL 1 -static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp, int local) +#define IDTE_PTOA 0x0800 +#define IDTE_NODAT 0x1000 +#define IDTE_GUEST_ASCE 0x2000 + +static inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp, + unsigned long opt, unsigned long asce, + int local) { unsigned long sto; - sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t); - asm volatile( - " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" - : "+m" (*pmdp) - : [r1] "a" (sto), [r2] "a" ((address & HPAGE_MASK)), - [m4] "i" (local) - : "cc" ); + sto = (unsigned long) pmdp - pmd_index(addr) * sizeof(pmd_t); + if (__builtin_constant_p(opt) && opt == 0) { + /* flush without guest asce */ + asm volatile( + " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" + : "+m" (*pmdp) + : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK)), + [m4] "i" (local) + : "cc" ); + } else { + /* flush with guest asce */ + asm volatile( + " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]" + : "+m" (*pmdp) + : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK) | opt), + [r3] "a" (asce), [m4] "i" (local) + : "cc" ); + } } -static inline void __pudp_idte(unsigned long address, pud_t *pudp, int local) +static inline void __pudp_idte(unsigned long addr, pud_t *pudp, + unsigned long opt, unsigned long asce, + int local) { unsigned long r3o; - r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t); + r3o = (unsigned long) pudp - pud_index(addr) * sizeof(pud_t); r3o |= _ASCE_TYPE_REGION3; - asm volatile( - " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" - : "+m" (*pudp) - : [r1] "a" (r3o), [r2] "a" ((address & PUD_MASK)), - [m4] "i" (local) - : "cc"); + if (__builtin_constant_p(opt) && opt == 0) { + /* flush without guest asce */ + asm volatile( + " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" + : "+m" (*pudp) + : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK)), + [m4] "i" (local) + : "cc"); + } else { + /* flush with guest asce */ + asm volatile( + " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]" + : "+m" (*pudp) + : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK) | opt), + [r3] "a" (asce), [m4] "i" (local) + : "cc" ); + } } pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t); @@ -1548,8 +1593,6 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#endif /* !__ASSEMBLY__ */ - #define kern_addr_valid(addr) (1) extern int vmem_add_mapping(unsigned long start, unsigned long size); diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 998b61cd0e56..eaee69e7c42a 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -80,7 +80,7 @@ struct qdr { u32 qkey : 4; u32 : 28; struct qdesfmt0 qdf0[126]; -} __attribute__ ((packed, aligned(4096))); +} __packed __aligned(PAGE_SIZE); #define QIB_AC_OUTBOUND_PCI_SUPPORTED 0x40 #define QIB_RFLAGS_ENABLE_QEBSM 0x80 diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index cd78155b1829..490e035b3716 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -29,8 +29,10 @@ #define MACHINE_FLAG_TE _BITUL(11) #define MACHINE_FLAG_TLB_LC _BITUL(12) #define MACHINE_FLAG_VX _BITUL(13) -#define MACHINE_FLAG_NX _BITUL(14) -#define MACHINE_FLAG_GS _BITUL(15) +#define MACHINE_FLAG_TLB_GUEST _BITUL(14) +#define MACHINE_FLAG_NX _BITUL(15) +#define MACHINE_FLAG_GS _BITUL(16) +#define MACHINE_FLAG_SCC _BITUL(17) #define LPP_MAGIC _BITUL(31) #define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) @@ -68,8 +70,10 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) #define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) #define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX) +#define MACHINE_HAS_TLB_GUEST (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_GUEST) #define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX) #define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS) +#define MACHINE_HAS_SCC (S390_lowcore.machine_flags & MACHINE_FLAG_SCC) /* * Console mode. Override with conmode= @@ -104,9 +108,16 @@ extern void pfault_fini(void); #define pfault_fini() do { } while (0) #endif /* CONFIG_PFAULT */ +#ifdef CONFIG_VMCP +void vmcp_cma_reserve(void); +#else +static inline void vmcp_cma_reserve(void) { } +#endif + void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault); -extern void cmma_init(void); +void cmma_init(void); +void cmma_init_nodat(void); extern void (*_machine_restart)(char *command); extern void (*_machine_halt)(void); diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 217ee5210c32..8182b521c42f 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -92,10 +92,11 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp) { typecheck(int, lp->lock); asm volatile( - "st %1,%0\n" - : "+Q" (lp->lock) - : "d" (0) - : "cc", "memory"); +#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES + " .long 0xb2fa0070\n" /* NIAI 7 */ +#endif + " st %1,%0\n" + : "=Q" (lp->lock) : "d" (0) : "cc", "memory"); } /* diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 118535123f34..93f2eb3f277c 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -15,6 +15,8 @@ /* The value of the TOD clock for 1.1.1970. */ #define TOD_UNIX_EPOCH 0x7d91048bca000000ULL +extern u64 clock_comparator_max; + /* Inline functions for clock register access. */ static inline int set_tod_clock(__u64 time) { @@ -126,7 +128,7 @@ static inline unsigned long long local_tick_disable(void) unsigned long long old; old = S390_lowcore.clock_comparator; - S390_lowcore.clock_comparator = -1ULL; + S390_lowcore.clock_comparator = clock_comparator_max; set_clock_comparator(S390_lowcore.clock_comparator); return old; } @@ -174,24 +176,24 @@ static inline cycles_t get_cycles(void) return (cycles_t) get_tod_clock() >> 2; } -int get_phys_clock(unsigned long long *clock); +int get_phys_clock(unsigned long *clock); void init_cpu_timer(void); unsigned long long monotonic_clock(void); -extern u64 sched_clock_base_cc; +extern unsigned char tod_clock_base[16] __aligned(8); /** * get_clock_monotonic - returns current time in clock rate units * * The caller must ensure that preemption is disabled. - * The clock and sched_clock_base get changed via stop_machine. + * The clock and tod_clock_base get changed via stop_machine. * Therefore preemption must be disabled when calling this * function, otherwise the returned value is not guaranteed to * be monotonic. */ static inline unsigned long long get_tod_clock_monotonic(void) { - return get_tod_clock() - sched_clock_base_cc; + return get_tod_clock() - *(unsigned long long *) &tod_clock_base[1]; } /** @@ -218,4 +220,32 @@ static inline unsigned long long tod_to_ns(unsigned long long todval) return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9); } +/** + * tod_after - compare two 64 bit TOD values + * @a: first 64 bit TOD timestamp + * @b: second 64 bit TOD timestamp + * + * Returns: true if a is later than b + */ +static inline int tod_after(unsigned long long a, unsigned long long b) +{ + if (MACHINE_HAS_SCC) + return (long long) a > (long long) b; + return a > b; +} + +/** + * tod_after_eq - compare two 64 bit TOD values + * @a: first 64 bit TOD timestamp + * @b: second 64 bit TOD timestamp + * + * Returns: true if a is later than b + */ +static inline int tod_after_eq(unsigned long long a, unsigned long long b) +{ + if (MACHINE_HAS_SCC) + return (long long) a >= (long long) b; + return a >= b; +} + #endif diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 2eb8ff0d6fca..3a14b864b2e3 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -135,7 +135,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, unsigned long address) { - if (tlb->mm->context.asce_limit <= (1UL << 31)) + if (tlb->mm->context.asce_limit <= _REGION3_SIZE) return; pgtable_pmd_page_dtor(virt_to_page(pmd)); tlb_remove_table(tlb, pmd); @@ -151,7 +151,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, unsigned long address) { - if (tlb->mm->context.asce_limit <= (1UL << 53)) + if (tlb->mm->context.asce_limit <= _REGION1_SIZE) return; tlb_remove_table(tlb, p4d); } @@ -166,7 +166,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, unsigned long address) { - if (tlb->mm->context.asce_limit <= (1UL << 42)) + if (tlb->mm->context.asce_limit <= _REGION2_SIZE) return; tlb_remove_table(tlb, pud); } diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 39846100682a..4d759f8f4bc7 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -20,10 +20,15 @@ static inline void __tlb_flush_local(void) */ static inline void __tlb_flush_idte(unsigned long asce) { + unsigned long opt; + + opt = IDTE_PTOA; + if (MACHINE_HAS_TLB_GUEST) + opt |= IDTE_GUEST_ASCE; /* Global TLB flush for the mm */ asm volatile( " .insn rrf,0xb98e0000,0,%0,%1,0" - : : "a" (2048), "a" (asce) : "cc"); + : : "a" (opt), "a" (asce) : "cc"); } #ifdef CONFIG_SMP diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index fa1bfce10370..5222da162b69 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -77,12 +77,6 @@ static inline const struct cpumask *cpumask_of_node(int node) return &node_to_cpumask_map[node]; } -/* - * Returns the number of the node containing node 'node'. This - * architecture is flat, so it is a pretty simple function! - */ -#define parent_node(node) (node) - #define pcibus_to_node(bus) __pcibus_to_node(bus) #define node_distance(a, b) __node_distance(a, b) diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h deleted file mode 100644 index 6740f4f9781f..000000000000 --- a/arch/s390/include/asm/types.h +++ /dev/null @@ -1,11 +0,0 @@ -/* - * S390 version - * - * Derived from "include/asm-i386/types.h" - */ -#ifndef _S390_TYPES_H -#define _S390_TYPES_H - -#include <uapi/asm/types.h> - -#endif /* _S390_TYPES_H */ diff --git a/arch/s390/include/asm/unaligned.h b/arch/s390/include/asm/unaligned.h deleted file mode 100644 index da9627afe5d8..000000000000 --- a/arch/s390/include/asm/unaligned.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef _ASM_S390_UNALIGNED_H -#define _ASM_S390_UNALIGNED_H - -/* - * The S390 can do unaligned accesses itself. - */ -#include <linux/unaligned/access_ok.h> -#include <linux/unaligned/generic.h> - -#define get_unaligned __get_unaligned_be -#define put_unaligned __put_unaligned_be - -#endif /* _ASM_S390_UNALIGNED_H */ diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index ca62066895e0..098f28778a13 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild @@ -9,4 +9,5 @@ generic-y += param.h generic-y += poll.h generic-y += resource.h generic-y += sockios.h +generic-y += swab.h generic-y += termbits.h diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h index 1340311dab77..ab5797cdc1b7 100644 --- a/arch/s390/include/uapi/asm/dasd.h +++ b/arch/s390/include/uapi/asm/dasd.h @@ -72,7 +72,10 @@ typedef struct dasd_information2_t { * 0x02: use diag discipline (diag) * 0x04: set the device initially online (internal use only) * 0x08: enable ERP related logging - * 0x20: give access to raw eckd data + * 0x10: allow I/O to fail on lost paths + * 0x20: allow I/O to fail when a lock was stolen + * 0x40: give access to raw eckd data + * 0x80: enable discard support */ #define DASD_FEATURE_DEFAULT 0x00 #define DASD_FEATURE_READONLY 0x01 @@ -82,6 +85,7 @@ typedef struct dasd_information2_t { #define DASD_FEATURE_FAILFAST 0x10 #define DASD_FEATURE_FAILONSLCK 0x20 #define DASD_FEATURE_USERAW 0x40 +#define DASD_FEATURE_DISCARD 0x80 #define DASD_PARTN_BITS 2 diff --git a/arch/s390/include/uapi/asm/swab.h b/arch/s390/include/uapi/asm/swab.h deleted file mode 100644 index da3bfe5cc161..000000000000 --- a/arch/s390/include/uapi/asm/swab.h +++ /dev/null @@ -1,89 +0,0 @@ -#ifndef _S390_SWAB_H -#define _S390_SWAB_H - -/* - * S390 version - * Copyright IBM Corp. 1999 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - */ - -#include <linux/types.h> - -#ifndef __s390x__ -# define __SWAB_64_THRU_32__ -#endif - -#ifdef __s390x__ -static inline __u64 __arch_swab64p(const __u64 *x) -{ - __u64 result; - - asm volatile("lrvg %0,%1" : "=d" (result) : "m" (*x)); - return result; -} -#define __arch_swab64p __arch_swab64p - -static inline __u64 __arch_swab64(__u64 x) -{ - __u64 result; - - asm volatile("lrvgr %0,%1" : "=d" (result) : "d" (x)); - return result; -} -#define __arch_swab64 __arch_swab64 - -static inline void __arch_swab64s(__u64 *x) -{ - *x = __arch_swab64p(x); -} -#define __arch_swab64s __arch_swab64s -#endif /* __s390x__ */ - -static inline __u32 __arch_swab32p(const __u32 *x) -{ - __u32 result; - - asm volatile( -#ifndef __s390x__ - " icm %0,8,%O1+3(%R1)\n" - " icm %0,4,%O1+2(%R1)\n" - " icm %0,2,%O1+1(%R1)\n" - " ic %0,%1" - : "=&d" (result) : "Q" (*x) : "cc"); -#else /* __s390x__ */ - " lrv %0,%1" - : "=d" (result) : "m" (*x)); -#endif /* __s390x__ */ - return result; -} -#define __arch_swab32p __arch_swab32p - -#ifdef __s390x__ -static inline __u32 __arch_swab32(__u32 x) -{ - __u32 result; - - asm volatile("lrvr %0,%1" : "=d" (result) : "d" (x)); - return result; -} -#define __arch_swab32 __arch_swab32 -#endif /* __s390x__ */ - -static inline __u16 __arch_swab16p(const __u16 *x) -{ - __u16 result; - - asm volatile( -#ifndef __s390x__ - " icm %0,2,%O1+1(%R1)\n" - " ic %0,%1\n" - : "=&d" (result) : "Q" (*x) : "cc"); -#else /* __s390x__ */ - " lrvh %0,%1" - : "=d" (result) : "m" (*x)); -#endif /* __s390x__ */ - return result; -} -#define __arch_swab16p __arch_swab16p - -#endif /* _S390_SWAB_H */ diff --git a/drivers/s390/char/vmcp.h b/arch/s390/include/uapi/asm/vmcp.h index 1e29b0418382..4caf71714a55 100644 --- a/drivers/s390/char/vmcp.h +++ b/arch/s390/include/uapi/asm/vmcp.h @@ -12,19 +12,13 @@ * The idea of this driver is based on cpint from Neale Ferguson */ +#ifndef _UAPI_ASM_VMCP_H +#define _UAPI_ASM_VMCP_H + #include <linux/ioctl.h> -#include <linux/mutex.h> -#define VMCP_GETCODE _IOR(0x10, 1, int) -#define VMCP_SETBUF _IOW(0x10, 2, int) -#define VMCP_GETSIZE _IOR(0x10, 3, int) +#define VMCP_GETCODE _IOR(0x10, 1, int) +#define VMCP_SETBUF _IOW(0x10, 2, int) +#define VMCP_GETSIZE _IOR(0x10, 3, int) -struct vmcp_session { - unsigned int bufsize; - char *response; - int resp_size; - int resp_code; - /* As we use copy_from/to_user, which might * - * sleep and cannot use a spinlock */ - struct mutex mutex; -}; +#endif /* _UAPI_ASM_VMCP_H */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index b65c414b6c0e..3d42f91c95fd 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -158,6 +158,7 @@ int main(void) OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock); OFFSET(__LC_INT_CLOCK, lowcore, int_clock); OFFSET(__LC_MCCK_CLOCK, lowcore, mcck_clock); + OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock); OFFSET(__LC_CURRENT, lowcore, current_task); OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack); OFFSET(__LC_ASYNC_STACK, lowcore, async_stack); diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index 9f0e4a2785f7..63bc6603e0ed 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -14,6 +14,7 @@ #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> +#include <linux/mm.h> #include <asm/diag.h> #include <asm/ebcdic.h> #include <asm/cpcmd.h> @@ -28,9 +29,7 @@ static int diag8_noresponse(int cmdlen) register unsigned long reg3 asm ("3") = cmdlen; asm volatile( - " sam31\n" " diag %1,%0,0x8\n" - " sam64\n" : "+d" (reg3) : "d" (reg2) : "cc"); return reg3; } @@ -43,9 +42,7 @@ static int diag8_response(int cmdlen, char *response, int *rlen) register unsigned long reg5 asm ("5") = *rlen; asm volatile( - " sam31\n" " diag %2,%0,0x8\n" - " sam64\n" " brc 8,1f\n" " agr %1,%4\n" "1:\n" @@ -57,7 +54,6 @@ static int diag8_response(int cmdlen, char *response, int *rlen) /* * __cpcmd has some restrictions over cpcmd - * - the response buffer must reside below 2GB (if any) * - __cpcmd is unlocked and therefore not SMP-safe */ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) @@ -88,13 +84,12 @@ EXPORT_SYMBOL(__cpcmd); int cpcmd(const char *cmd, char *response, int rlen, int *response_code) { + unsigned long flags; char *lowbuf; int len; - unsigned long flags; - if ((virt_to_phys(response) != (unsigned long) response) || - (((unsigned long)response + rlen) >> 31)) { - lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA); + if (is_vmalloc_or_module_addr(response)) { + lowbuf = kmalloc(rlen, GFP_KERNEL); if (!lowbuf) { pr_warn("The cpcmd kernel function failed to allocate a response buffer\n"); return -ENOMEM; diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 86b3e74f569e..1d9e83c401fc 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -866,7 +866,8 @@ static inline void debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level, int exception) { - active->id.stck = get_tod_clock_fast() - sched_clock_base_cc; + active->id.stck = get_tod_clock_fast() - + *(unsigned long long *) &tod_clock_base[1]; active->id.fields.cpuid = smp_processor_id(); active->caller = __builtin_return_address(0); active->id.fields.exception = exception; @@ -1455,15 +1456,15 @@ int debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, int area, debug_entry_t * entry, char *out_buf) { - unsigned long sec, usec; + unsigned long base, sec, usec; char *except_str; unsigned long caller; int rc = 0; unsigned int level; level = entry->id.fields.level; - sec = (entry->id.stck >> 12) + (sched_clock_base_cc >> 12); - sec = sec - (TOD_UNIX_EPOCH >> 12); + base = (*(unsigned long *) &tod_clock_base[0]) >> 4; + sec = (entry->id.stck >> 12) + base - (TOD_UNIX_EPOCH >> 12); usec = do_div(sec, USEC_PER_SEC); if (entry->id.fields.exception) diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index dab78babfab6..2aa545dca4d5 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -76,7 +76,7 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task, frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); #ifdef CONFIG_CHECK_STACK sp = __dump_trace(func, data, sp, - S390_lowcore.panic_stack + frame_size - 4096, + S390_lowcore.panic_stack + frame_size - PAGE_SIZE, S390_lowcore.panic_stack + frame_size); #endif sp = __dump_trace(func, data, sp, diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 5d20182ee8ae..ca8cd80e8feb 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -53,8 +53,9 @@ static void __init reset_tod_clock(void) if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0) disabled_wait(0); - sched_clock_base_cc = TOD_UNIX_EPOCH; - S390_lowcore.last_update_clock = sched_clock_base_cc; + memset(tod_clock_base, 0, 16); + *(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH; + S390_lowcore.last_update_clock = TOD_UNIX_EPOCH; } #ifdef CONFIG_SHARED_KERNEL @@ -165,8 +166,8 @@ static noinline __init void create_kernel_nss(void) } /* re-initialize cputime accounting. */ - sched_clock_base_cc = get_tod_clock(); - S390_lowcore.last_update_clock = sched_clock_base_cc; + get_tod_clock_ext(tod_clock_base); + S390_lowcore.last_update_clock = *(__u64 *) &tod_clock_base[1]; S390_lowcore.last_update_timer = 0x7fffffffffffffffULL; S390_lowcore.user_timer = 0; S390_lowcore.system_timer = 0; @@ -387,6 +388,12 @@ static __init void detect_machine_facilities(void) } if (test_facility(133)) S390_lowcore.machine_flags |= MACHINE_FLAG_GS; + if (test_facility(139) && (tod_clock_base[1] & 0x80)) { + /* Enabled signed clock comparator comparisons */ + S390_lowcore.machine_flags |= MACHINE_FLAG_SCC; + clock_comparator_max = -1ULL >> 1; + __ctl_set_bit(0, 53); + } } static inline void save_vector_registers(void) @@ -413,7 +420,7 @@ static int __init disable_vector_extension(char *str) { S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX; __ctl_clear_bit(0, 17); - return 1; + return 0; } early_param("novx", disable_vector_extension); diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index eff5b31671d4..8ed753c72d9b 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -302,7 +302,8 @@ ENTRY(startup_kdump) xc 0xe00(256),0xe00 xc 0xf00(256),0xf00 lctlg %c0,%c15,0x200(%r0) # initialize control registers - stck __LC_LAST_UPDATE_CLOCK + stcke __LC_BOOT_CLOCK + mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1 spt 6f-.LPG0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) l %r15,.Lstack-.LPG0(%r13) diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 31c91f24e562..0d8f2a858ced 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -21,8 +21,8 @@ ENTRY(startup_continue) xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid mvi __LC_LPP,0x80 # and set LPP_MAGIC .insn s,0xb2800000,__LC_LPP # load program parameter -0: larl %r1,sched_clock_base_cc - mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK +0: larl %r1,tod_clock_base + mvc 0(16,%r1),__LC_BOOT_CLOCK larl %r13,.LPG1 # get base lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 6dca93b29bed..a2fdff0e730b 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -105,7 +105,8 @@ void do_IRQ(struct pt_regs *regs, int irq) old_regs = set_irq_regs(regs); irq_enter(); - if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) + if (tod_after_eq(S390_lowcore.int_clock, + S390_lowcore.clock_comparator)) /* Serve timer interrupts first. */ clock_comparator_work(); generic_handle_irq(irq); diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S index cfac28330b03..4bdc65636603 100644 --- a/arch/s390/kernel/relocate_kernel.S +++ b/arch/s390/kernel/relocate_kernel.S @@ -7,6 +7,7 @@ */ #include <linux/linkage.h> +#include <asm/page.h> #include <asm/sigp.h> /* @@ -55,8 +56,8 @@ ENTRY(relocate_kernel) .back_pgm: lmg %r0,%r15,gprregs-.base(%r13) .top: - lghi %r7,4096 # load PAGE_SIZE in r7 - lghi %r9,4096 # load PAGE_SIZE in r9 + lghi %r7,PAGE_SIZE # load PAGE_SIZE in r7 + lghi %r9,PAGE_SIZE # load PAGE_SIZE in r9 lg %r5,0(%r2) # read another word for indirection page aghi %r2,8 # increment pointer tml %r5,0x1 # is it a destination page? diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 3d1d808ea8a9..164a1e16b53e 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -305,7 +305,7 @@ static void __init setup_lowcore(void) /* * Setup lowcore for boot cpu */ - BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * 4096); + BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE); lc = memblock_virt_alloc_low(sizeof(*lc), sizeof(*lc)); lc->restart_psw.mask = PSW_KERNEL_BITS; lc->restart_psw.addr = (unsigned long) restart_int_handler; @@ -323,7 +323,7 @@ static void __init setup_lowcore(void) lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_MCHECK; lc->io_new_psw.addr = (unsigned long) io_int_handler; - lc->clock_comparator = -1ULL; + lc->clock_comparator = clock_comparator_max; lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->async_stack = (unsigned long) @@ -469,10 +469,10 @@ static void __init setup_memory_end(void) vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN; tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; tmp = tmp * (sizeof(struct page) + PAGE_SIZE); - if (tmp + vmalloc_size + MODULES_LEN <= (1UL << 42)) - vmax = 1UL << 42; /* 3-level kernel page table */ + if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE) + vmax = _REGION2_SIZE; /* 3-level kernel page table */ else - vmax = 1UL << 53; /* 4-level kernel page table */ + vmax = _REGION1_SIZE; /* 4-level kernel page table */ /* module area is at the end of the kernel address space. */ MODULES_END = vmax; MODULES_VADDR = MODULES_END - MODULES_LEN; @@ -818,6 +818,9 @@ static int __init setup_hwcaps(void) case 0x2965: strcpy(elf_platform, "z13"); break; + case 0x3906: + strcpy(elf_platform, "z14"); + break; } /* @@ -922,6 +925,7 @@ void __init setup_arch(char **cmdline_p) setup_memory_end(); setup_memory(); dma_contiguous_reserve(memory_end); + vmcp_cma_reserve(); check_initrd(); reserve_crashkernel(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 1020a11a24e5..1cee6753d47a 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1181,6 +1181,7 @@ static int __init s390_smp_init(void) rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online", smp_cpu_online, smp_cpu_pre_down); + rc = rc <= 0 ? rc : 0; out: return rc; } diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index 39e2f41b6cf0..c8ea715bfe10 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -98,10 +98,16 @@ int page_key_alloc(unsigned long pages) */ void page_key_read(unsigned long *pfn) { + struct page *page; unsigned long addr; - - addr = (unsigned long) page_address(pfn_to_page(*pfn)); - *(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr); + unsigned char key; + + page = pfn_to_page(*pfn); + addr = (unsigned long) page_address(page); + key = (unsigned char) page_get_storage_key(addr) & 0x7f; + if (arch_test_page_nodat(page)) + key |= 0x80; + *(unsigned char *) pfn = key; } /* @@ -126,8 +132,16 @@ void page_key_memorize(unsigned long *pfn) */ void page_key_write(void *address) { - page_set_storage_key((unsigned long) address, - page_key_rp->data[page_key_rx], 0); + struct page *page; + unsigned char key; + + key = page_key_rp->data[page_key_rx]; + page_set_storage_key((unsigned long) address, key & 0x7f, 0); + page = virt_to_page(address); + if (key & 0x80) + arch_set_page_nodat(page, 0); + else + arch_set_page_dat(page, 0); if (++page_key_rx >= PAGE_KEY_DATA_SIZE) return; page_key_rp = page_key_rp->next; diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 192efdfac918..5cbd52169348 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -51,8 +51,15 @@ #include <asm/cio.h> #include "entry.h" -u64 sched_clock_base_cc = -1; /* Force to data section. */ -EXPORT_SYMBOL_GPL(sched_clock_base_cc); +unsigned char tod_clock_base[16] __aligned(8) = { + /* Force to data section. */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +EXPORT_SYMBOL_GPL(tod_clock_base); + +u64 clock_comparator_max = -1ULL; +EXPORT_SYMBOL_GPL(clock_comparator_max); static DEFINE_PER_CPU(struct clock_event_device, comparators); @@ -75,7 +82,7 @@ void __init time_early_init(void) struct ptff_qui qui; /* Initialize TOD steering parameters */ - tod_steering_end = sched_clock_base_cc; + tod_steering_end = *(unsigned long long *) &tod_clock_base[1]; vdso_data->ts_end = tod_steering_end; if (!test_facility(28)) @@ -111,22 +118,27 @@ unsigned long long monotonic_clock(void) } EXPORT_SYMBOL(monotonic_clock); -static void tod_to_timeval(__u64 todval, struct timespec64 *xt) +static void ext_to_timespec64(unsigned char *clk, struct timespec64 *xt) { - unsigned long long sec; + unsigned long long high, low, rem, sec, nsec; + + /* Split extendnd TOD clock to micro-seconds and sub-micro-seconds */ + high = (*(unsigned long long *) clk) >> 4; + low = (*(unsigned long long *)&clk[7]) << 4; + /* Calculate seconds and nano-seconds */ + sec = high; + rem = do_div(sec, 1000000); + nsec = (((low >> 32) + (rem << 32)) * 1000) >> 32; - sec = todval >> 12; - do_div(sec, 1000000); xt->tv_sec = sec; - todval -= (sec * 1000000) << 12; - xt->tv_nsec = ((todval * 1000) >> 12); + xt->tv_nsec = nsec; } void clock_comparator_work(void) { struct clock_event_device *cd; - S390_lowcore.clock_comparator = -1ULL; + S390_lowcore.clock_comparator = clock_comparator_max; cd = this_cpu_ptr(&comparators); cd->event_handler(cd); } @@ -148,7 +160,7 @@ void init_cpu_timer(void) struct clock_event_device *cd; int cpu; - S390_lowcore.clock_comparator = -1ULL; + S390_lowcore.clock_comparator = clock_comparator_max; set_clock_comparator(S390_lowcore.clock_comparator); cpu = smp_processor_id(); @@ -179,7 +191,7 @@ static void clock_comparator_interrupt(struct ext_code ext_code, unsigned long param64) { inc_irq_stat(IRQEXT_CLK); - if (S390_lowcore.clock_comparator == -1ULL) + if (S390_lowcore.clock_comparator == clock_comparator_max) set_clock_comparator(S390_lowcore.clock_comparator); } @@ -197,18 +209,28 @@ static void stp_reset(void); void read_persistent_clock64(struct timespec64 *ts) { - __u64 clock; + unsigned char clk[STORE_CLOCK_EXT_SIZE]; + __u64 delta; - clock = get_tod_clock() - initial_leap_seconds; - tod_to_timeval(clock - TOD_UNIX_EPOCH, ts); + delta = initial_leap_seconds + TOD_UNIX_EPOCH; + get_tod_clock_ext(clk); + *(__u64 *) &clk[1] -= delta; + if (*(__u64 *) &clk[1] > delta) + clk[0]--; + ext_to_timespec64(clk, ts); } void read_boot_clock64(struct timespec64 *ts) { - __u64 clock; + unsigned char clk[STORE_CLOCK_EXT_SIZE]; + __u64 delta; - clock = sched_clock_base_cc - initial_leap_seconds; - tod_to_timeval(clock - TOD_UNIX_EPOCH, ts); + delta = initial_leap_seconds + TOD_UNIX_EPOCH; + memcpy(clk, tod_clock_base, 16); + *(__u64 *) &clk[1] -= delta; + if (*(__u64 *) &clk[1] > delta) + clk[0]--; + ext_to_timespec64(clk, ts); } static u64 read_tod_clock(struct clocksource *cs) @@ -335,7 +357,7 @@ static unsigned long clock_sync_flags; * source. If the clock mode is local it will return -EOPNOTSUPP and * -EAGAIN if the clock is not in sync with the external reference. */ -int get_phys_clock(unsigned long long *clock) +int get_phys_clock(unsigned long *clock) { atomic_t *sw_ptr; unsigned int sw0, sw1; @@ -406,7 +428,10 @@ static void clock_sync_global(unsigned long long delta) struct ptff_qto qto; /* Fixup the monotonic sched clock. */ - sched_clock_base_cc += delta; + *(unsigned long long *) &tod_clock_base[1] += delta; + if (*(unsigned long long *) &tod_clock_base[1] < delta) + /* Epoch overflow */ + tod_clock_base[0]++; /* Adjust TOD steering parameters. */ vdso_data->tb_update_count++; now = get_tod_clock(); @@ -437,7 +462,7 @@ static void clock_sync_global(unsigned long long delta) static void clock_sync_local(unsigned long long delta) { /* Add the delta to the clock comparator. */ - if (S390_lowcore.clock_comparator != -1ULL) { + if (S390_lowcore.clock_comparator != clock_comparator_max) { S390_lowcore.clock_comparator += delta; set_clock_comparator(S390_lowcore.clock_comparator); } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index b89d19f6f2ab..eacda05b45d7 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -157,6 +157,8 @@ int vdso_alloc_per_cpu(struct lowcore *lowcore) page_frame = get_zeroed_page(GFP_KERNEL); if (!segment_table || !page_table || !page_frame) goto out; + arch_set_page_dat(virt_to_page(segment_table), SEGMENT_ORDER); + arch_set_page_dat(virt_to_page(page_table), 0); /* Initialize per-cpu vdso data page */ vd = (struct vdso_per_cpu_data *) page_frame; diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S index 8f048c2d6d13..263a7f9eee1e 100644 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -2,6 +2,8 @@ * This is the infamous ld script for the 32 bits vdso * library */ + +#include <asm/page.h> #include <asm/vdso.h> OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") @@ -91,7 +93,7 @@ SECTIONS .debug_ranges 0 : { *(.debug_ranges) } .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); PROVIDE(_vdso_data = .); /DISCARD/ : { diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index f35455d497fe..9e3dbbcc1cfc 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -2,6 +2,8 @@ * This is the infamous ld script for the 64 bits vdso * library */ + +#include <asm/page.h> #include <asm/vdso.h> OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") @@ -91,7 +93,7 @@ SECTIONS .debug_ranges 0 : { *(.debug_ranges) } .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); PROVIDE(_vdso_data = .); /DISCARD/ : { diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index ce865bd4f81d..e4d36094aceb 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -27,7 +27,7 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) unsigned long prefix = kvm_s390_get_prefix(vcpu); start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; - end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; + end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + PAGE_SIZE; vcpu->stat.diagnose_10++; if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end @@ -51,9 +51,9 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) */ gmap_discard(vcpu->arch.gmap, start, prefix); if (start <= prefix) - gmap_discard(vcpu->arch.gmap, 0, 4096); - if (end > prefix + 4096) - gmap_discard(vcpu->arch.gmap, 4096, 8192); + gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE); + if (end > prefix + PAGE_SIZE) + gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE); gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end); } return 0; diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 653cae5e1ee1..3cc77391a102 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -629,7 +629,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130); if (asce.r) goto real_address; - ptr = asce.origin * 4096; + ptr = asce.origin * PAGE_SIZE; switch (asce.dt) { case ASCE_TYPE_REGION1: if (vaddr.rfx01 > asce.tl) @@ -674,7 +674,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, return PGM_REGION_SECOND_TRANS; if (edat1) dat_protection |= rfte.p; - ptr = rfte.rto * 4096 + vaddr.rsx * 8; + ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8; } /* fallthrough */ case ASCE_TYPE_REGION2: { @@ -692,7 +692,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, return PGM_REGION_THIRD_TRANS; if (edat1) dat_protection |= rste.p; - ptr = rste.rto * 4096 + vaddr.rtx * 8; + ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8; } /* fallthrough */ case ASCE_TYPE_REGION3: { @@ -720,7 +720,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, return PGM_SEGMENT_TRANSLATION; if (edat1) dat_protection |= rtte.fc0.p; - ptr = rtte.fc0.sto * 4096 + vaddr.sx * 8; + ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8; } /* fallthrough */ case ASCE_TYPE_SEGMENT: { @@ -743,7 +743,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, goto absolute_address; } dat_protection |= ste.fc0.p; - ptr = ste.fc0.pto * 2048 + vaddr.px * 8; + ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8; } } if (kvm_is_error_gpa(vcpu->kvm, ptr)) @@ -993,7 +993,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, parent = sg->parent; vaddr.addr = saddr; asce.val = sg->orig_asce; - ptr = asce.origin * 4096; + ptr = asce.origin * PAGE_SIZE; if (asce.r) { *fake = 1; ptr = 0; @@ -1029,7 +1029,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, union region1_table_entry rfte; if (*fake) { - ptr += (unsigned long) vaddr.rfx << 53; + ptr += vaddr.rfx * _REGION1_SIZE; rfte.val = ptr; goto shadow_r2t; } @@ -1044,7 +1044,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, return PGM_REGION_SECOND_TRANS; if (sg->edat_level >= 1) *dat_protection |= rfte.p; - ptr = rfte.rto << 12UL; + ptr = rfte.rto * PAGE_SIZE; shadow_r2t: rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake); if (rc) @@ -1055,7 +1055,7 @@ shadow_r2t: union region2_table_entry rste; if (*fake) { - ptr += (unsigned long) vaddr.rsx << 42; + ptr += vaddr.rsx * _REGION2_SIZE; rste.val = ptr; goto shadow_r3t; } @@ -1070,7 +1070,7 @@ shadow_r2t: return PGM_REGION_THIRD_TRANS; if (sg->edat_level >= 1) *dat_protection |= rste.p; - ptr = rste.rto << 12UL; + ptr = rste.rto * PAGE_SIZE; shadow_r3t: rste.p |= *dat_protection; rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake); @@ -1082,7 +1082,7 @@ shadow_r3t: union region3_table_entry rtte; if (*fake) { - ptr += (unsigned long) vaddr.rtx << 31; + ptr += vaddr.rtx * _REGION3_SIZE; rtte.val = ptr; goto shadow_sgt; } @@ -1098,7 +1098,7 @@ shadow_r3t: if (rtte.fc && sg->edat_level >= 2) { *dat_protection |= rtte.fc0.p; *fake = 1; - ptr = rtte.fc1.rfaa << 31UL; + ptr = rtte.fc1.rfaa * _REGION3_SIZE; rtte.val = ptr; goto shadow_sgt; } @@ -1106,7 +1106,7 @@ shadow_r3t: return PGM_SEGMENT_TRANSLATION; if (sg->edat_level >= 1) *dat_protection |= rtte.fc0.p; - ptr = rtte.fc0.sto << 12UL; + ptr = rtte.fc0.sto * PAGE_SIZE; shadow_sgt: rtte.fc0.p |= *dat_protection; rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake); @@ -1118,7 +1118,7 @@ shadow_sgt: union segment_table_entry ste; if (*fake) { - ptr += (unsigned long) vaddr.sx << 20; + ptr += vaddr.sx * _SEGMENT_SIZE; ste.val = ptr; goto shadow_pgt; } @@ -1134,11 +1134,11 @@ shadow_sgt: *dat_protection |= ste.fc0.p; if (ste.fc && sg->edat_level >= 1) { *fake = 1; - ptr = ste.fc1.sfaa << 20UL; + ptr = ste.fc1.sfaa * _SEGMENT_SIZE; ste.val = ptr; goto shadow_pgt; } - ptr = ste.fc0.pto << 11UL; + ptr = ste.fc0.pto * (PAGE_SIZE / 2); shadow_pgt: ste.fc0.p |= *dat_protection; rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake); @@ -1187,8 +1187,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, vaddr.addr = saddr; if (fake) { - /* offset in 1MB guest memory block */ - pte.val = pgt + ((unsigned long) vaddr.px << 12UL); + pte.val = pgt + vaddr.px * PAGE_SIZE; goto shadow_page; } if (!rc) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 8a1dac793d6b..785ad028bde6 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -329,7 +329,7 @@ static int handle_sske(struct kvm_vcpu *vcpu) start = kvm_s390_logical_to_effective(vcpu, start); if (m3 & SSKE_MB) { /* start already designates an absolute address */ - end = (start + (1UL << 20)) & ~((1UL << 20) - 1); + end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1); } else { start = kvm_s390_real_to_abs(vcpu, start); end = start + PAGE_SIZE; @@ -893,10 +893,10 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) case 0x00000000: /* only 4k frames specify a real address */ start = kvm_s390_real_to_abs(vcpu, start); - end = (start + (1UL << 12)) & ~((1UL << 12) - 1); + end = (start + PAGE_SIZE) & ~(PAGE_SIZE - 1); break; case 0x00001000: - end = (start + (1UL << 20)) & ~((1UL << 20) - 1); + end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1); break; case 0x00002000: /* only support 2G frame size if EDAT2 is available and we are @@ -904,7 +904,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) if (!test_kvm_facility(vcpu->kvm, 78) || psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_24BIT) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - end = (start + (1UL << 31)) & ~((1UL << 31) - 1); + end = (start + _REGION3_SIZE) & ~(_REGION3_SIZE - 1); break; default: return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 715c19c45d9a..ba8203e4d516 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1069,7 +1069,7 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - BUILD_BUG_ON(sizeof(struct vsie_page) != 4096); + BUILD_BUG_ON(sizeof(struct vsie_page) != PAGE_SIZE); scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL); /* 512 byte alignment */ diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index 92e90e40b6fb..7f17555ad4d5 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -57,7 +57,7 @@ static void __udelay_enabled(unsigned long long usecs) end = get_tod_clock_fast() + (usecs << 12); do { clock_saved = 0; - if (end < S390_lowcore.clock_comparator) { + if (tod_after(S390_lowcore.clock_comparator, end)) { clock_saved = local_tick_disable(); set_clock_comparator(end); } diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index ffb15bd4c593..b12663d653d8 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -32,42 +32,63 @@ static int __init spin_retry_setup(char *str) } __setup("spin_retry=", spin_retry_setup); +static inline int arch_load_niai4(int *lock) +{ + int owner; + + asm volatile( +#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES + " .long 0xb2fa0040\n" /* NIAI 4 */ +#endif + " l %0,%1\n" + : "=d" (owner) : "Q" (*lock) : "memory"); + return owner; +} + +static inline int arch_cmpxchg_niai8(int *lock, int old, int new) +{ + int expected = old; + + asm volatile( +#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES + " .long 0xb2fa0080\n" /* NIAI 8 */ +#endif + " cs %0,%3,%1\n" + : "=d" (old), "=Q" (*lock) + : "0" (old), "d" (new), "Q" (*lock) + : "cc", "memory"); + return expected == old; +} + void arch_spin_lock_wait(arch_spinlock_t *lp) { int cpu = SPINLOCK_LOCKVAL; - int owner, count, first_diag; + int owner, count; + + /* Pass the virtual CPU to the lock holder if it is not running */ + owner = arch_load_niai4(&lp->lock); + if (owner && arch_vcpu_is_preempted(~owner)) + smp_yield_cpu(~owner); - first_diag = 1; + count = spin_retry; while (1) { - owner = ACCESS_ONCE(lp->lock); + owner = arch_load_niai4(&lp->lock); /* Try to get the lock if it is free. */ if (!owner) { - if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu)) + if (arch_cmpxchg_niai8(&lp->lock, 0, cpu)) return; continue; } - /* First iteration: check if the lock owner is running. */ - if (first_diag && arch_vcpu_is_preempted(~owner)) { - smp_yield_cpu(~owner); - first_diag = 0; + if (count-- >= 0) continue; - } - /* Loop for a while on the lock value. */ count = spin_retry; - do { - owner = ACCESS_ONCE(lp->lock); - } while (owner && count-- > 0); - if (!owner) - continue; /* * For multiple layers of hypervisors, e.g. z/VM + LPAR * yield the CPU unconditionally. For LPAR rely on the * sense running status. */ - if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) { + if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) smp_yield_cpu(~owner); - first_diag = 0; - } } } EXPORT_SYMBOL(arch_spin_lock_wait); @@ -75,42 +96,36 @@ EXPORT_SYMBOL(arch_spin_lock_wait); void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) { int cpu = SPINLOCK_LOCKVAL; - int owner, count, first_diag; + int owner, count; local_irq_restore(flags); - first_diag = 1; + + /* Pass the virtual CPU to the lock holder if it is not running */ + owner = arch_load_niai4(&lp->lock); + if (owner && arch_vcpu_is_preempted(~owner)) + smp_yield_cpu(~owner); + + count = spin_retry; while (1) { - owner = ACCESS_ONCE(lp->lock); + owner = arch_load_niai4(&lp->lock); /* Try to get the lock if it is free. */ if (!owner) { local_irq_disable(); - if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu)) + if (arch_cmpxchg_niai8(&lp->lock, 0, cpu)) return; local_irq_restore(flags); continue; } - /* Check if the lock owner is running. */ - if (first_diag && arch_vcpu_is_preempted(~owner)) { - smp_yield_cpu(~owner); - first_diag = 0; + if (count-- >= 0) continue; - } - /* Loop for a while on the lock value. */ count = spin_retry; - do { - owner = ACCESS_ONCE(lp->lock); - } while (owner && count-- > 0); - if (!owner) - continue; /* * For multiple layers of hypervisors, e.g. z/VM + LPAR * yield the CPU unconditionally. For LPAR rely on the * sense running status. */ - if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) { + if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) smp_yield_cpu(~owner); - first_diag = 0; - } } } EXPORT_SYMBOL(arch_spin_lock_wait_flags); diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index b3bd3f23b8e8..4ea9106417ee 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -15,8 +15,30 @@ #include <asm/mmu_context.h> #include <asm/facility.h> +#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES static DEFINE_STATIC_KEY_FALSE(have_mvcos); +static int __init uaccess_init(void) +{ + if (test_facility(27)) + static_branch_enable(&have_mvcos); + return 0; +} +early_initcall(uaccess_init); + +static inline int copy_with_mvcos(void) +{ + if (static_branch_likely(&have_mvcos)) + return 1; + return 0; +} +#else +static inline int copy_with_mvcos(void) +{ + return 1; +} +#endif + static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, unsigned long size) { @@ -84,7 +106,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { - if (static_branch_likely(&have_mvcos)) + if (copy_with_mvcos()) return copy_from_user_mvcos(to, from, n); return copy_from_user_mvcp(to, from, n); } @@ -157,7 +179,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { - if (static_branch_likely(&have_mvcos)) + if (copy_with_mvcos()) return copy_to_user_mvcos(to, from, n); return copy_to_user_mvcs(to, from, n); } @@ -220,7 +242,7 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) { - if (static_branch_likely(&have_mvcos)) + if (copy_with_mvcos()) return copy_in_user_mvcos(to, from, n); return copy_in_user_mvc(to, from, n); } @@ -292,7 +314,7 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size) unsigned long __clear_user(void __user *to, unsigned long size) { - if (static_branch_likely(&have_mvcos)) + if (copy_with_mvcos()) return clear_user_mvcos(to, size); return clear_user_xc(to, size); } @@ -349,11 +371,3 @@ long __strncpy_from_user(char *dst, const char __user *src, long size) return done; } EXPORT_SYMBOL(__strncpy_from_user); - -static int __init uaccess_init(void) -{ - if (test_facility(27)) - static_branch_enable(&have_mvcos); - return 0; -} -early_initcall(uaccess_init); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 14f25798b001..bdabb013537b 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -135,7 +135,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address) pr_alert("AS:%016lx ", asce); switch (asce & _ASCE_TYPE_MASK) { case _ASCE_TYPE_REGION1: - table = table + ((address >> 53) & 0x7ff); + table += (address & _REGION1_INDEX) >> _REGION1_SHIFT; if (bad_address(table)) goto bad; pr_cont("R1:%016lx ", *table); @@ -144,7 +144,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address) table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* fallthrough */ case _ASCE_TYPE_REGION2: - table = table + ((address >> 42) & 0x7ff); + table += (address & _REGION2_INDEX) >> _REGION2_SHIFT; if (bad_address(table)) goto bad; pr_cont("R2:%016lx ", *table); @@ -153,7 +153,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address) table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* fallthrough */ case _ASCE_TYPE_REGION3: - table = table + ((address >> 31) & 0x7ff); + table += (address & _REGION3_INDEX) >> _REGION3_SHIFT; if (bad_address(table)) goto bad; pr_cont("R3:%016lx ", *table); @@ -162,7 +162,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address) table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* fallthrough */ case _ASCE_TYPE_SEGMENT: - table = table + ((address >> 20) & 0x7ff); + table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; if (bad_address(table)) goto bad; pr_cont("S:%016lx ", *table); @@ -170,7 +170,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address) goto out; table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); } - table = table + ((address >> 12) & 0xff); + table += (address & _PAGE_INDEX) >> _PAGE_SHIFT; if (bad_address(table)) goto bad; pr_cont("P:%016lx ", *table); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 4fb3d3cdb370..9e1494e3d849 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -36,16 +36,16 @@ static struct gmap *gmap_alloc(unsigned long limit) unsigned long *table; unsigned long etype, atype; - if (limit < (1UL << 31)) { - limit = (1UL << 31) - 1; + if (limit < _REGION3_SIZE) { + limit = _REGION3_SIZE - 1; atype = _ASCE_TYPE_SEGMENT; etype = _SEGMENT_ENTRY_EMPTY; - } else if (limit < (1UL << 42)) { - limit = (1UL << 42) - 1; + } else if (limit < _REGION2_SIZE) { + limit = _REGION2_SIZE - 1; atype = _ASCE_TYPE_REGION3; etype = _REGION3_ENTRY_EMPTY; - } else if (limit < (1UL << 53)) { - limit = (1UL << 53) - 1; + } else if (limit < _REGION1_SIZE) { + limit = _REGION1_SIZE - 1; atype = _ASCE_TYPE_REGION2; etype = _REGION2_ENTRY_EMPTY; } else { @@ -65,7 +65,7 @@ static struct gmap *gmap_alloc(unsigned long limit) spin_lock_init(&gmap->guest_table_lock); spin_lock_init(&gmap->shadow_lock); atomic_set(&gmap->ref_count, 1); - page = alloc_pages(GFP_KERNEL, 2); + page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); if (!page) goto out_free; page->index = 0; @@ -186,7 +186,7 @@ static void gmap_free(struct gmap *gmap) gmap_flush_tlb(gmap); /* Free all segment & region tables. */ list_for_each_entry_safe(page, next, &gmap->crst_list, lru) - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); gmap_radix_tree_free(&gmap->guest_to_host); gmap_radix_tree_free(&gmap->host_to_guest); @@ -306,7 +306,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, unsigned long *new; /* since we dont free the gmap table until gmap_free we can unlock */ - page = alloc_pages(GFP_KERNEL, 2); + page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; new = (unsigned long *) page_to_phys(page); @@ -321,7 +321,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, } spin_unlock(&gmap->guest_table_lock); if (page) - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); return 0; } @@ -546,30 +546,30 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) /* Create higher level tables in the gmap page table */ table = gmap->table; if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { - table += (gaddr >> 53) & 0x7ff; + table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT; if ((*table & _REGION_ENTRY_INVALID) && gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, - gaddr & 0xffe0000000000000UL)) + gaddr & _REGION1_MASK)) return -ENOMEM; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); } if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { - table += (gaddr >> 42) & 0x7ff; + table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT; if ((*table & _REGION_ENTRY_INVALID) && gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, - gaddr & 0xfffffc0000000000UL)) + gaddr & _REGION2_MASK)) return -ENOMEM; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); } if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { - table += (gaddr >> 31) & 0x7ff; + table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT; if ((*table & _REGION_ENTRY_INVALID) && gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, - gaddr & 0xffffffff80000000UL)) + gaddr & _REGION3_MASK)) return -ENOMEM; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); } - table += (gaddr >> 20) & 0x7ff; + table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; /* Walk the parent mm page table */ mm = gmap->mm; pgd = pgd_offset(mm, vmaddr); @@ -771,7 +771,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, table = gmap->table; switch (gmap->asce & _ASCE_TYPE_MASK) { case _ASCE_TYPE_REGION1: - table += (gaddr >> 53) & 0x7ff; + table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT; if (level == 4) break; if (*table & _REGION_ENTRY_INVALID) @@ -779,7 +779,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* Fallthrough */ case _ASCE_TYPE_REGION2: - table += (gaddr >> 42) & 0x7ff; + table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT; if (level == 3) break; if (*table & _REGION_ENTRY_INVALID) @@ -787,7 +787,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* Fallthrough */ case _ASCE_TYPE_REGION3: - table += (gaddr >> 31) & 0x7ff; + table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT; if (level == 2) break; if (*table & _REGION_ENTRY_INVALID) @@ -795,13 +795,13 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* Fallthrough */ case _ASCE_TYPE_SEGMENT: - table += (gaddr >> 20) & 0x7ff; + table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; if (level == 1) break; if (*table & _REGION_ENTRY_INVALID) return NULL; table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); - table += (gaddr >> 12) & 0xff; + table += (gaddr & _PAGE_INDEX) >> _PAGE_SHIFT; } return table; } @@ -1126,7 +1126,7 @@ static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr) table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */ if (!table || *table & _PAGE_INVALID) return; - gmap_call_notifier(sg, raddr, raddr + (1UL << 12) - 1); + gmap_call_notifier(sg, raddr, raddr + _PAGE_SIZE - 1); ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table); } @@ -1144,7 +1144,7 @@ static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr, int i; BUG_ON(!gmap_is_shadow(sg)); - for (i = 0; i < 256; i++, raddr += 1UL << 12) + for (i = 0; i < _PAGE_ENTRIES; i++, raddr += _PAGE_SIZE) pgt[i] = _PAGE_INVALID; } @@ -1164,8 +1164,8 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */ if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN)) return; - gmap_call_notifier(sg, raddr, raddr + (1UL << 20) - 1); - sto = (unsigned long) (ste - ((raddr >> 20) & 0x7ff)); + gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1); + sto = (unsigned long) (ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT)); gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr); pgt = (unsigned long *)(*ste & _SEGMENT_ENTRY_ORIGIN); *ste = _SEGMENT_ENTRY_EMPTY; @@ -1193,7 +1193,7 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, BUG_ON(!gmap_is_shadow(sg)); asce = (unsigned long) sgt | _ASCE_TYPE_SEGMENT; - for (i = 0; i < 2048; i++, raddr += 1UL << 20) { + for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) { if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN)) continue; pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN); @@ -1222,8 +1222,8 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */ if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN)) return; - gmap_call_notifier(sg, raddr, raddr + (1UL << 31) - 1); - r3o = (unsigned long) (r3e - ((raddr >> 31) & 0x7ff)); + gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1); + r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT)); gmap_idte_one(r3o | _ASCE_TYPE_REGION3, raddr); sgt = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN); *r3e = _REGION3_ENTRY_EMPTY; @@ -1231,7 +1231,7 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) /* Free segment table */ page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT); list_del(&page->lru); - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); } /** @@ -1251,7 +1251,7 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, BUG_ON(!gmap_is_shadow(sg)); asce = (unsigned long) r3t | _ASCE_TYPE_REGION3; - for (i = 0; i < 2048; i++, raddr += 1UL << 31) { + for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) { if (!(r3t[i] & _REGION_ENTRY_ORIGIN)) continue; sgt = (unsigned long *)(r3t[i] & _REGION_ENTRY_ORIGIN); @@ -1260,7 +1260,7 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, /* Free segment table */ page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT); list_del(&page->lru); - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1280,8 +1280,8 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */ if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN)) return; - gmap_call_notifier(sg, raddr, raddr + (1UL << 42) - 1); - r2o = (unsigned long) (r2e - ((raddr >> 42) & 0x7ff)); + gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1); + r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT)); gmap_idte_one(r2o | _ASCE_TYPE_REGION2, raddr); r3t = (unsigned long *)(*r2e & _REGION_ENTRY_ORIGIN); *r2e = _REGION2_ENTRY_EMPTY; @@ -1289,7 +1289,7 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) /* Free region 3 table */ page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT); list_del(&page->lru); - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); } /** @@ -1309,7 +1309,7 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, BUG_ON(!gmap_is_shadow(sg)); asce = (unsigned long) r2t | _ASCE_TYPE_REGION2; - for (i = 0; i < 2048; i++, raddr += 1UL << 42) { + for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) { if (!(r2t[i] & _REGION_ENTRY_ORIGIN)) continue; r3t = (unsigned long *)(r2t[i] & _REGION_ENTRY_ORIGIN); @@ -1318,7 +1318,7 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, /* Free region 3 table */ page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT); list_del(&page->lru); - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1338,8 +1338,8 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */ if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN)) return; - gmap_call_notifier(sg, raddr, raddr + (1UL << 53) - 1); - r1o = (unsigned long) (r1e - ((raddr >> 53) & 0x7ff)); + gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1); + r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT)); gmap_idte_one(r1o | _ASCE_TYPE_REGION1, raddr); r2t = (unsigned long *)(*r1e & _REGION_ENTRY_ORIGIN); *r1e = _REGION1_ENTRY_EMPTY; @@ -1347,7 +1347,7 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) /* Free region 2 table */ page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT); list_del(&page->lru); - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); } /** @@ -1367,7 +1367,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, BUG_ON(!gmap_is_shadow(sg)); asce = (unsigned long) r1t | _ASCE_TYPE_REGION1; - for (i = 0; i < 2048; i++, raddr += 1UL << 53) { + for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) { if (!(r1t[i] & _REGION_ENTRY_ORIGIN)) continue; r2t = (unsigned long *)(r1t[i] & _REGION_ENTRY_ORIGIN); @@ -1378,7 +1378,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, /* Free region 2 table */ page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT); list_del(&page->lru); - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1535,7 +1535,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, /* protect after insertion, so it will get properly invalidated */ down_read(&parent->mm->mmap_sem); rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN, - ((asce & _ASCE_TABLE_LENGTH) + 1) * 4096, + ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE, PROT_READ, PGSTE_VSIE_BIT); up_read(&parent->mm->mmap_sem); spin_lock(&parent->shadow_lock); @@ -1578,7 +1578,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, BUG_ON(!gmap_is_shadow(sg)); /* Allocate a shadow region second table */ - page = alloc_pages(GFP_KERNEL, 2); + page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; page->index = r2t & _REGION_ENTRY_ORIGIN; @@ -1614,10 +1614,10 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, } spin_unlock(&sg->guest_table_lock); /* Make r2t read-only in parent gmap page table */ - raddr = (saddr & 0xffe0000000000000UL) | _SHADOW_RMAP_REGION1; + raddr = (saddr & _REGION1_MASK) | _SHADOW_RMAP_REGION1; origin = r2t & _REGION_ENTRY_ORIGIN; - offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * 4096; - len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; + offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; + len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); spin_lock(&sg->guest_table_lock); if (!rc) { @@ -1634,7 +1634,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, return rc; out_free: spin_unlock(&sg->guest_table_lock); - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); return rc; } EXPORT_SYMBOL_GPL(gmap_shadow_r2t); @@ -1662,7 +1662,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, BUG_ON(!gmap_is_shadow(sg)); /* Allocate a shadow region second table */ - page = alloc_pages(GFP_KERNEL, 2); + page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; page->index = r3t & _REGION_ENTRY_ORIGIN; @@ -1697,10 +1697,10 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, } spin_unlock(&sg->guest_table_lock); /* Make r3t read-only in parent gmap page table */ - raddr = (saddr & 0xfffffc0000000000UL) | _SHADOW_RMAP_REGION2; + raddr = (saddr & _REGION2_MASK) | _SHADOW_RMAP_REGION2; origin = r3t & _REGION_ENTRY_ORIGIN; - offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * 4096; - len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; + offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; + len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); spin_lock(&sg->guest_table_lock); if (!rc) { @@ -1717,7 +1717,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, return rc; out_free: spin_unlock(&sg->guest_table_lock); - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); return rc; } EXPORT_SYMBOL_GPL(gmap_shadow_r3t); @@ -1745,7 +1745,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE)); /* Allocate a shadow segment table */ - page = alloc_pages(GFP_KERNEL, 2); + page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; page->index = sgt & _REGION_ENTRY_ORIGIN; @@ -1781,10 +1781,10 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, } spin_unlock(&sg->guest_table_lock); /* Make sgt read-only in parent gmap page table */ - raddr = (saddr & 0xffffffff80000000UL) | _SHADOW_RMAP_REGION3; + raddr = (saddr & _REGION3_MASK) | _SHADOW_RMAP_REGION3; origin = sgt & _REGION_ENTRY_ORIGIN; - offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * 4096; - len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; + offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; + len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); spin_lock(&sg->guest_table_lock); if (!rc) { @@ -1801,7 +1801,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, return rc; out_free: spin_unlock(&sg->guest_table_lock); - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); return rc; } EXPORT_SYMBOL_GPL(gmap_shadow_sgt); @@ -1902,7 +1902,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, } spin_unlock(&sg->guest_table_lock); /* Make pgt read-only in parent gmap page table (not the pgste) */ - raddr = (saddr & 0xfffffffffff00000UL) | _SHADOW_RMAP_SEGMENT; + raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT; origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK; rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE, PROT_READ); spin_lock(&sg->guest_table_lock); @@ -2021,7 +2021,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, } /* Check for top level table */ start = sg->orig_asce & _ASCE_ORIGIN; - end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * 4096; + end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE; if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start && gaddr < end) { /* The complete shadow table has to go */ @@ -2032,7 +2032,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, return; } /* Remove the page table tree from on specific entry */ - head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> 12); + head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT); gmap_for_each_rmap_safe(rmap, rnext, head) { bits = rmap->raddr & _SHADOW_RMAP_MASK; raddr = rmap->raddr ^ bits; @@ -2076,7 +2076,7 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, struct gmap *gmap, *sg, *next; offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); - offset = offset * (4096 / sizeof(pte_t)); + offset = offset * (PAGE_SIZE / sizeof(pte_t)); rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); @@ -2121,6 +2121,37 @@ static inline void thp_split_mm(struct mm_struct *mm) } /* + * Remove all empty zero pages from the mapping for lazy refaulting + * - This must be called after mm->context.has_pgste is set, to avoid + * future creation of zero pages + * - This must be called after THP was enabled + */ +static int __zap_zero_pages(pmd_t *pmd, unsigned long start, + unsigned long end, struct mm_walk *walk) +{ + unsigned long addr; + + for (addr = start; addr != end; addr += PAGE_SIZE) { + pte_t *ptep; + spinlock_t *ptl; + + ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + if (is_zero_pfn(pte_pfn(*ptep))) + ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID)); + pte_unmap_unlock(ptep, ptl); + } + return 0; +} + +static inline void zap_zero_pages(struct mm_struct *mm) +{ + struct mm_walk walk = { .pmd_entry = __zap_zero_pages }; + + walk.mm = mm; + walk_page_range(0, TASK_SIZE, &walk); +} + +/* * switch on pgstes for its userspace process (for kvm) */ int s390_enable_sie(void) @@ -2137,6 +2168,7 @@ int s390_enable_sie(void) mm->context.has_pgste = 1; /* split thp mappings and disable thp for future mappings */ thp_split_mm(mm); + zap_zero_pages(mm); up_write(&mm->mmap_sem); return 0; } @@ -2149,13 +2181,6 @@ EXPORT_SYMBOL_GPL(s390_enable_sie); static int __s390_enable_skey(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { - /* - * Remove all zero page mappings, - * after establishing a policy to forbid zero page mappings - * following faults for that page will get fresh anonymous pages - */ - if (is_zero_pfn(pte_pfn(*pte))) - ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID)); /* Clear storage key */ ptep_zap_key(walk->mm, addr, pte); return 0; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 8111694ce55a..3b567838b905 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -26,6 +26,7 @@ #include <linux/poison.h> #include <linux/initrd.h> #include <linux/export.h> +#include <linux/cma.h> #include <linux/gfp.h> #include <linux/memblock.h> #include <asm/processor.h> @@ -84,7 +85,7 @@ void __init paging_init(void) psw_t psw; init_mm.pgd = swapper_pg_dir; - if (VMALLOC_END > (1UL << 42)) { + if (VMALLOC_END > _REGION2_SIZE) { asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; pgd_type = _REGION2_ENTRY_EMPTY; } else { @@ -93,8 +94,7 @@ void __init paging_init(void) } init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; S390_lowcore.kernel_asce = init_mm.context.asce; - clear_table((unsigned long *) init_mm.pgd, pgd_type, - sizeof(unsigned long)*2048); + crst_table_init((unsigned long *) init_mm.pgd, pgd_type); vmem_map_init(); /* enable virtual mapping in kernel mode */ @@ -137,6 +137,8 @@ void __init mem_init(void) free_all_bootmem(); setup_zero_pages(); /* Setup zeroed pages. */ + cmma_init_nodat(); + mem_init_print_info(NULL); } @@ -166,6 +168,58 @@ unsigned long memory_block_size_bytes(void) } #ifdef CONFIG_MEMORY_HOTPLUG + +#ifdef CONFIG_CMA + +/* Prevent memory blocks which contain cma regions from going offline */ + +struct s390_cma_mem_data { + unsigned long start; + unsigned long end; +}; + +static int s390_cma_check_range(struct cma *cma, void *data) +{ + struct s390_cma_mem_data *mem_data; + unsigned long start, end; + + mem_data = data; + start = cma_get_base(cma); + end = start + cma_get_size(cma); + if (end < mem_data->start) + return 0; + if (start >= mem_data->end) + return 0; + return -EBUSY; +} + +static int s390_cma_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct s390_cma_mem_data mem_data; + struct memory_notify *arg; + int rc = 0; + + arg = data; + mem_data.start = arg->start_pfn << PAGE_SHIFT; + mem_data.end = mem_data.start + (arg->nr_pages << PAGE_SHIFT); + if (action == MEM_GOING_OFFLINE) + rc = cma_for_each_area(s390_cma_check_range, &mem_data); + return notifier_from_errno(rc); +} + +static struct notifier_block s390_cma_mem_nb = { + .notifier_call = s390_cma_mem_notifier, +}; + +static int __init s390_cma_mem_init(void) +{ + return register_memory_notifier(&s390_cma_mem_nb); +} +device_initcall(s390_cma_mem_init); + +#endif /* CONFIG_CMA */ + int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) { unsigned long start_pfn = PFN_DOWN(start); diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 69a7b01ae746..07fa7b8ae233 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -10,9 +10,10 @@ #include <linux/errno.h> #include <linux/types.h> #include <linux/mm.h> +#include <linux/memblock.h> #include <linux/gfp.h> #include <linux/init.h> - +#include <asm/facility.h> #include <asm/page-states.h> static int cmma_flag = 1; @@ -36,14 +37,16 @@ __setup("cmma=", cmma); static inline int cmma_test_essa(void) { register unsigned long tmp asm("0") = 0; - register int rc asm("1") = -EOPNOTSUPP; + register int rc asm("1"); + /* test ESSA_GET_STATE */ asm volatile( - " .insn rrf,0xb9ab0000,%1,%1,0,0\n" + " .insn rrf,0xb9ab0000,%1,%1,%2,0\n" "0: la %0,0\n" "1:\n" EX_TABLE(0b,1b) - : "+&d" (rc), "+&d" (tmp)); + : "=&d" (rc), "+&d" (tmp) + : "i" (ESSA_GET_STATE), "0" (-EOPNOTSUPP)); return rc; } @@ -51,11 +54,26 @@ void __init cmma_init(void) { if (!cmma_flag) return; - if (cmma_test_essa()) + if (cmma_test_essa()) { cmma_flag = 0; + return; + } + if (test_facility(147)) + cmma_flag = 2; } -static inline void set_page_unstable(struct page *page, int order) +static inline unsigned char get_page_state(struct page *page) +{ + unsigned char state; + + asm volatile(" .insn rrf,0xb9ab0000,%0,%1,%2,0" + : "=&d" (state) + : "a" (page_to_phys(page)), + "i" (ESSA_GET_STATE)); + return state & 0x3f; +} + +static inline void set_page_unused(struct page *page, int order) { int i, rc; @@ -66,14 +84,18 @@ static inline void set_page_unstable(struct page *page, int order) "i" (ESSA_SET_UNUSED)); } -void arch_free_page(struct page *page, int order) +static inline void set_page_stable_dat(struct page *page, int order) { - if (!cmma_flag) - return; - set_page_unstable(page, order); + int i, rc; + + for (i = 0; i < (1 << order); i++) + asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" + : "=&d" (rc) + : "a" (page_to_phys(page + i)), + "i" (ESSA_SET_STABLE)); } -static inline void set_page_stable(struct page *page, int order) +static inline void set_page_stable_nodat(struct page *page, int order) { int i, rc; @@ -81,14 +103,154 @@ static inline void set_page_stable(struct page *page, int order) asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" : "=&d" (rc) : "a" (page_to_phys(page + i)), - "i" (ESSA_SET_STABLE)); + "i" (ESSA_SET_STABLE_NODAT)); +} + +static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end) +{ + unsigned long next; + struct page *page; + pmd_t *pmd; + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + if (pmd_none(*pmd) || pmd_large(*pmd)) + continue; + page = virt_to_page(pmd_val(*pmd)); + set_bit(PG_arch_1, &page->flags); + } while (pmd++, addr = next, addr != end); +} + +static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end) +{ + unsigned long next; + struct page *page; + pud_t *pud; + int i; + + pud = pud_offset(p4d, addr); + do { + next = pud_addr_end(addr, end); + if (pud_none(*pud) || pud_large(*pud)) + continue; + if (!pud_folded(*pud)) { + page = virt_to_page(pud_val(*pud)); + for (i = 0; i < 3; i++) + set_bit(PG_arch_1, &page[i].flags); + } + mark_kernel_pmd(pud, addr, next); + } while (pud++, addr = next, addr != end); +} + +static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end) +{ + unsigned long next; + struct page *page; + p4d_t *p4d; + int i; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none(*p4d)) + continue; + if (!p4d_folded(*p4d)) { + page = virt_to_page(p4d_val(*p4d)); + for (i = 0; i < 3; i++) + set_bit(PG_arch_1, &page[i].flags); + } + mark_kernel_pud(p4d, addr, next); + } while (p4d++, addr = next, addr != end); +} + +static void mark_kernel_pgd(void) +{ + unsigned long addr, next; + struct page *page; + pgd_t *pgd; + int i; + + addr = 0; + pgd = pgd_offset_k(addr); + do { + next = pgd_addr_end(addr, MODULES_END); + if (pgd_none(*pgd)) + continue; + if (!pgd_folded(*pgd)) { + page = virt_to_page(pgd_val(*pgd)); + for (i = 0; i < 3; i++) + set_bit(PG_arch_1, &page[i].flags); + } + mark_kernel_p4d(pgd, addr, next); + } while (pgd++, addr = next, addr != MODULES_END); +} + +void __init cmma_init_nodat(void) +{ + struct memblock_region *reg; + struct page *page; + unsigned long start, end, ix; + + if (cmma_flag < 2) + return; + /* Mark pages used in kernel page tables */ + mark_kernel_pgd(); + + /* Set all kernel pages not used for page tables to stable/no-dat */ + for_each_memblock(memory, reg) { + start = memblock_region_memory_base_pfn(reg); + end = memblock_region_memory_end_pfn(reg); + page = pfn_to_page(start); + for (ix = start; ix < end; ix++, page++) { + if (__test_and_clear_bit(PG_arch_1, &page->flags)) + continue; /* skip page table pages */ + if (!list_empty(&page->lru)) + continue; /* skip free pages */ + set_page_stable_nodat(page, 0); + } + } +} + +void arch_free_page(struct page *page, int order) +{ + if (!cmma_flag) + return; + set_page_unused(page, order); } void arch_alloc_page(struct page *page, int order) { if (!cmma_flag) return; - set_page_stable(page, order); + if (cmma_flag < 2) + set_page_stable_dat(page, order); + else + set_page_stable_nodat(page, order); +} + +void arch_set_page_dat(struct page *page, int order) +{ + if (!cmma_flag) + return; + set_page_stable_dat(page, order); +} + +void arch_set_page_nodat(struct page *page, int order) +{ + if (cmma_flag < 2) + return; + set_page_stable_nodat(page, order); +} + +int arch_test_page_nodat(struct page *page) +{ + unsigned char state; + + if (cmma_flag < 2) + return 0; + state = get_page_state(page); + return !!(state & 0x20); } void arch_set_page_states(int make_stable) @@ -108,9 +270,9 @@ void arch_set_page_states(int make_stable) list_for_each(l, &zone->free_area[order].free_list[t]) { page = list_entry(l, struct page, lru); if (make_stable) - set_page_stable(page, order); + set_page_stable_dat(page, 0); else - set_page_unstable(page, order); + set_page_unused(page, order); } } spin_unlock_irqrestore(&zone->lock, flags); diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 180481589246..552f898dfa74 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -7,6 +7,7 @@ #include <asm/cacheflush.h> #include <asm/facility.h> #include <asm/pgtable.h> +#include <asm/pgalloc.h> #include <asm/page.h> #include <asm/set_memory.h> @@ -191,7 +192,7 @@ static int split_pud_page(pud_t *pudp, unsigned long addr) pud_t new; int i, ro, nx; - pm_dir = vmem_pmd_alloc(); + pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); if (!pm_dir) return -ENOMEM; pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT; @@ -328,7 +329,7 @@ static void ipte_range(pte_t *pte, unsigned long address, int nr) return; } for (i = 0; i < nr; i++) { - __ptep_ipte(address, pte, IPTE_GLOBAL); + __ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL); address += PAGE_SIZE; pte++; } diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 18918e394ce4..c5b74dd61197 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -57,6 +57,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm) if (!page) return NULL; + arch_set_page_dat(page, 2); return (unsigned long *) page_to_phys(page); } @@ -82,7 +83,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) int rc, notify; /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ - BUG_ON(mm->context.asce_limit < (1UL << 42)); + BUG_ON(mm->context.asce_limit < _REGION2_SIZE); if (end >= TASK_SIZE_MAX) return -ENOMEM; rc = 0; @@ -95,11 +96,11 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) } spin_lock_bh(&mm->page_table_lock); pgd = (unsigned long *) mm->pgd; - if (mm->context.asce_limit == (1UL << 42)) { + if (mm->context.asce_limit == _REGION2_SIZE) { crst_table_init(table, _REGION2_ENTRY_EMPTY); p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd); mm->pgd = (pgd_t *) table; - mm->context.asce_limit = 1UL << 53; + mm->context.asce_limit = _REGION1_SIZE; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_REGION2; } else { @@ -123,7 +124,7 @@ void crst_table_downgrade(struct mm_struct *mm) pgd_t *pgd; /* downgrade should only happen from 3 to 2 levels (compat only) */ - BUG_ON(mm->context.asce_limit != (1UL << 42)); + BUG_ON(mm->context.asce_limit != _REGION2_SIZE); if (current->active_mm == mm) { clear_user_asce(); @@ -132,7 +133,7 @@ void crst_table_downgrade(struct mm_struct *mm) pgd = mm->pgd; mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); - mm->context.asce_limit = 1UL << 31; + mm->context.asce_limit = _REGION3_SIZE; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; crst_table_free(mm, (unsigned long *) pgd); @@ -214,6 +215,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) __free_page(page); return NULL; } + arch_set_page_dat(page, 0); /* Initialize page table */ table = (unsigned long *) page_to_phys(page); if (mm_alloc_pgste(mm)) { diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 4a1f7366b17a..4198a71b8fdd 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -25,8 +25,49 @@ #include <asm/mmu_context.h> #include <asm/page-states.h> +static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, int nodat) +{ + unsigned long opt, asce; + + if (MACHINE_HAS_TLB_GUEST) { + opt = 0; + asce = READ_ONCE(mm->context.gmap_asce); + if (asce == 0UL || nodat) + opt |= IPTE_NODAT; + if (asce != -1UL) { + asce = asce ? : mm->context.asce; + opt |= IPTE_GUEST_ASCE; + } + __ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL); + } else { + __ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL); + } +} + +static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, int nodat) +{ + unsigned long opt, asce; + + if (MACHINE_HAS_TLB_GUEST) { + opt = 0; + asce = READ_ONCE(mm->context.gmap_asce); + if (asce == 0UL || nodat) + opt |= IPTE_NODAT; + if (asce != -1UL) { + asce = asce ? : mm->context.asce; + opt |= IPTE_GUEST_ASCE; + } + __ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL); + } else { + __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); + } +} + static inline pte_t ptep_flush_direct(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) + unsigned long addr, pte_t *ptep, + int nodat) { pte_t old; @@ -36,15 +77,16 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm, atomic_inc(&mm->context.flush_count); if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __ptep_ipte(addr, ptep, IPTE_LOCAL); + ptep_ipte_local(mm, addr, ptep, nodat); else - __ptep_ipte(addr, ptep, IPTE_GLOBAL); + ptep_ipte_global(mm, addr, ptep, nodat); atomic_dec(&mm->context.flush_count); return old; } static inline pte_t ptep_flush_lazy(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) + unsigned long addr, pte_t *ptep, + int nodat) { pte_t old; @@ -57,7 +99,7 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm, pte_val(*ptep) |= _PAGE_INVALID; mm->context.flush_mm = 1; } else - __ptep_ipte(addr, ptep, IPTE_GLOBAL); + ptep_ipte_global(mm, addr, ptep, nodat); atomic_dec(&mm->context.flush_count); return old; } @@ -229,10 +271,12 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, { pgste_t pgste; pte_t old; + int nodat; preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); - old = ptep_flush_direct(mm, addr, ptep); + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); + old = ptep_flush_direct(mm, addr, ptep, nodat); old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); preempt_enable(); return old; @@ -244,10 +288,12 @@ pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, { pgste_t pgste; pte_t old; + int nodat; preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); - old = ptep_flush_lazy(mm, addr, ptep); + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); + old = ptep_flush_lazy(mm, addr, ptep, nodat); old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); preempt_enable(); return old; @@ -259,10 +305,12 @@ pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, { pgste_t pgste; pte_t old; + int nodat; preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); - old = ptep_flush_lazy(mm, addr, ptep); + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); + old = ptep_flush_lazy(mm, addr, ptep, nodat); if (mm_has_pgste(mm)) { pgste = pgste_update_all(old, pgste, mm); pgste_set(ptep, pgste); @@ -290,6 +338,28 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(ptep_modify_prot_commit); +static inline void pmdp_idte_local(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + if (MACHINE_HAS_TLB_GUEST) + __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, + mm->context.asce, IDTE_LOCAL); + else + __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL); +} + +static inline void pmdp_idte_global(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + if (MACHINE_HAS_TLB_GUEST) + __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, + mm->context.asce, IDTE_GLOBAL); + else if (MACHINE_HAS_IDTE) + __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); + else + __pmdp_csp(pmdp); +} + static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { @@ -298,16 +368,12 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, old = *pmdp; if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) return old; - if (!MACHINE_HAS_IDTE) { - __pmdp_csp(pmdp); - return old; - } atomic_inc(&mm->context.flush_count); if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __pmdp_idte(addr, pmdp, IDTE_LOCAL); + pmdp_idte_local(mm, addr, pmdp); else - __pmdp_idte(addr, pmdp, IDTE_GLOBAL); + pmdp_idte_global(mm, addr, pmdp); atomic_dec(&mm->context.flush_count); return old; } @@ -325,10 +391,9 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, cpumask_of(smp_processor_id()))) { pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; mm->context.flush_mm = 1; - } else if (MACHINE_HAS_IDTE) - __pmdp_idte(addr, pmdp, IDTE_GLOBAL); - else - __pmdp_csp(pmdp); + } else { + pmdp_idte_global(mm, addr, pmdp); + } atomic_dec(&mm->context.flush_count); return old; } @@ -359,28 +424,46 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(pmdp_xchg_lazy); -static inline pud_t pudp_flush_direct(struct mm_struct *mm, - unsigned long addr, pud_t *pudp) +static inline void pudp_idte_local(struct mm_struct *mm, + unsigned long addr, pud_t *pudp) { - pud_t old; + if (MACHINE_HAS_TLB_GUEST) + __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, + mm->context.asce, IDTE_LOCAL); + else + __pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL); +} - old = *pudp; - if (pud_val(old) & _REGION_ENTRY_INVALID) - return old; - if (!MACHINE_HAS_IDTE) { +static inline void pudp_idte_global(struct mm_struct *mm, + unsigned long addr, pud_t *pudp) +{ + if (MACHINE_HAS_TLB_GUEST) + __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, + mm->context.asce, IDTE_GLOBAL); + else if (MACHINE_HAS_IDTE) + __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL); + else /* * Invalid bit position is the same for pmd and pud, so we can * re-use _pmd_csp() here */ __pmdp_csp((pmd_t *) pudp); +} + +static inline pud_t pudp_flush_direct(struct mm_struct *mm, + unsigned long addr, pud_t *pudp) +{ + pud_t old; + + old = *pudp; + if (pud_val(old) & _REGION_ENTRY_INVALID) return old; - } atomic_inc(&mm->context.flush_count); if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __pudp_idte(addr, pudp, IDTE_LOCAL); + pudp_idte_local(mm, addr, pudp); else - __pudp_idte(addr, pudp, IDTE_GLOBAL); + pudp_idte_global(mm, addr, pudp); atomic_dec(&mm->context.flush_count); return old; } @@ -482,7 +565,7 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr, { pte_t entry; pgste_t pgste; - int pte_i, pte_p; + int pte_i, pte_p, nodat; pgste = pgste_get_lock(ptep); entry = *ptep; @@ -495,13 +578,14 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr, return -EAGAIN; } /* Change access rights and set pgste bit */ + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); if (prot == PROT_NONE && !pte_i) { - ptep_flush_direct(mm, addr, ptep); + ptep_flush_direct(mm, addr, ptep, nodat); pgste = pgste_update_all(entry, pgste, mm); pte_val(entry) |= _PAGE_INVALID; } if (prot == PROT_READ && !pte_p) { - ptep_flush_direct(mm, addr, ptep); + ptep_flush_direct(mm, addr, ptep, nodat); pte_val(entry) &= ~_PAGE_INVALID; pte_val(entry) |= _PAGE_PROTECT; } @@ -541,10 +625,12 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep) { pgste_t pgste; + int nodat; pgste = pgste_get_lock(ptep); /* notifier is called by the caller */ - ptep_flush_direct(mm, saddr, ptep); + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); + ptep_flush_direct(mm, saddr, ptep, nodat); /* don't touch the storage key - it belongs to parent pgste */ pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID)); pgste_set_unlock(ptep, pgste); @@ -617,6 +703,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) pte_t *ptep; pte_t pte; bool dirty; + int nodat; pgd = pgd_offset(mm, addr); p4d = p4d_alloc(mm, pgd, addr); @@ -645,7 +732,8 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) pte = *ptep; if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { pgste = pgste_pte_notify(mm, addr, ptep, pgste); - __ptep_ipte(addr, ptep, IPTE_GLOBAL); + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); + ptep_ipte_global(mm, addr, ptep, nodat); if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) pte_val(pte) |= _PAGE_PROTECT; else diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index d8398962a723..c0af0d7b6e5f 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -38,37 +38,14 @@ static void __ref *vmem_alloc_pages(unsigned int order) return (void *) memblock_alloc(size, size); } -static inline p4d_t *vmem_p4d_alloc(void) +void *vmem_crst_alloc(unsigned long val) { - p4d_t *p4d = NULL; + unsigned long *table; - p4d = vmem_alloc_pages(2); - if (!p4d) - return NULL; - clear_table((unsigned long *) p4d, _REGION2_ENTRY_EMPTY, PAGE_SIZE * 4); - return p4d; -} - -static inline pud_t *vmem_pud_alloc(void) -{ - pud_t *pud = NULL; - - pud = vmem_alloc_pages(2); - if (!pud) - return NULL; - clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4); - return pud; -} - -pmd_t *vmem_pmd_alloc(void) -{ - pmd_t *pmd = NULL; - - pmd = vmem_alloc_pages(2); - if (!pmd) - return NULL; - clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4); - return pmd; + table = vmem_alloc_pages(CRST_ALLOC_ORDER); + if (table) + crst_table_init(table, val); + return table; } pte_t __ref *vmem_pte_alloc(void) @@ -114,14 +91,14 @@ static int vmem_add_mem(unsigned long start, unsigned long size) while (address < end) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { - p4_dir = vmem_p4d_alloc(); + p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); if (!p4_dir) goto out; pgd_populate(&init_mm, pg_dir, p4_dir); } p4_dir = p4d_offset(pg_dir, address); if (p4d_none(*p4_dir)) { - pu_dir = vmem_pud_alloc(); + pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); if (!pu_dir) goto out; p4d_populate(&init_mm, p4_dir, pu_dir); @@ -136,7 +113,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size) continue; } if (pud_none(*pu_dir)) { - pm_dir = vmem_pmd_alloc(); + pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); if (!pm_dir) goto out; pud_populate(&init_mm, pu_dir, pm_dir); @@ -253,7 +230,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) for (address = start; address < end;) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { - p4_dir = vmem_p4d_alloc(); + p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); if (!p4_dir) goto out; pgd_populate(&init_mm, pg_dir, p4_dir); @@ -261,7 +238,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) p4_dir = p4d_offset(pg_dir, address); if (p4d_none(*p4_dir)) { - pu_dir = vmem_pud_alloc(); + pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); if (!pu_dir) goto out; p4d_populate(&init_mm, p4_dir, pu_dir); @@ -269,7 +246,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) pu_dir = pud_offset(p4_dir, address); if (pud_none(*pu_dir)) { - pm_dir = vmem_pmd_alloc(); + pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); if (!pm_dir) goto out; pud_populate(&init_mm, pu_dir, pm_dir); diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index bd534b4d40e3..0ae3936e266f 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -24,6 +24,14 @@ bool zpci_unique_uid; +static void update_uid_checking(bool new) +{ + if (zpci_unique_uid != new) + zpci_dbg(1, "uid checking:%d\n", new); + + zpci_unique_uid = new; +} + static inline void zpci_err_clp(unsigned int rsp, int rc) { struct { @@ -319,7 +327,7 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, void *data, goto out; } - zpci_unique_uid = rrb->response.uid_checking; + update_uid_checking(rrb->response.uid_checking); WARN_ON_ONCE(rrb->response.entry_size != sizeof(struct clp_fh_list_entry)); diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index 025ea20fc4b4..29d72bf8ed2b 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -41,7 +41,7 @@ static struct facility_def facility_defs[] = { 27, /* mvcos */ 32, /* compare and swap and store */ 33, /* compare and swap and store 2 */ - 34, /* general extension facility */ + 34, /* general instructions extension */ 35, /* execute extensions */ #endif #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES @@ -54,6 +54,9 @@ static struct facility_def facility_defs[] = { #ifdef CONFIG_HAVE_MARCH_Z13_FEATURES 53, /* load-and-zero-rightmost-byte, etc. */ #endif +#ifdef CONFIG_HAVE_MARCH_Z14_FEATURES + 58, /* miscellaneous-instruction-extension 2 */ +#endif -1 /* END */ } }, diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 670ac0a4ef49..9c97ad1ee121 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -801,11 +801,12 @@ static void dasd_profile_end(struct dasd_block *block, struct dasd_ccw_req *cqr, struct request *req) { - long strtime, irqtime, endtime, tottime; /* in microseconds */ - long tottimeps, sectors; + unsigned long strtime, irqtime, endtime, tottime; + unsigned long tottimeps, sectors; struct dasd_device *device; int sectors_ind, tottime_ind, tottimeps_ind, strtime_ind; int irqtime_ind, irqtimeps_ind, endtime_ind; + struct dasd_profile_info *data; device = cqr->startdev; if (!(dasd_global_profile_level || @@ -835,6 +836,11 @@ static void dasd_profile_end(struct dasd_block *block, spin_lock(&dasd_global_profile.lock); if (dasd_global_profile.data) { + data = dasd_global_profile.data; + data->dasd_sum_times += tottime; + data->dasd_sum_time_str += strtime; + data->dasd_sum_time_irq += irqtime; + data->dasd_sum_time_end += endtime; dasd_profile_end_add_data(dasd_global_profile.data, cqr->startdev != block->base, cqr->cpmode == 1, @@ -847,7 +853,12 @@ static void dasd_profile_end(struct dasd_block *block, spin_unlock(&dasd_global_profile.lock); spin_lock(&block->profile.lock); - if (block->profile.data) + if (block->profile.data) { + data = block->profile.data; + data->dasd_sum_times += tottime; + data->dasd_sum_time_str += strtime; + data->dasd_sum_time_irq += irqtime; + data->dasd_sum_time_end += endtime; dasd_profile_end_add_data(block->profile.data, cqr->startdev != block->base, cqr->cpmode == 1, @@ -856,10 +867,16 @@ static void dasd_profile_end(struct dasd_block *block, tottimeps_ind, strtime_ind, irqtime_ind, irqtimeps_ind, endtime_ind); + } spin_unlock(&block->profile.lock); spin_lock(&device->profile.lock); - if (device->profile.data) + if (device->profile.data) { + data = device->profile.data; + data->dasd_sum_times += tottime; + data->dasd_sum_time_str += strtime; + data->dasd_sum_time_irq += irqtime; + data->dasd_sum_time_end += endtime; dasd_profile_end_add_data(device->profile.data, cqr->startdev != block->base, cqr->cpmode == 1, @@ -868,6 +885,7 @@ static void dasd_profile_end(struct dasd_block *block, tottimeps_ind, strtime_ind, irqtime_ind, irqtimeps_ind, endtime_ind); + } spin_unlock(&device->profile.lock); } @@ -989,6 +1007,14 @@ static void dasd_stats_seq_print(struct seq_file *m, seq_printf(m, "total_sectors %u\n", data->dasd_io_sects); seq_printf(m, "total_pav %u\n", data->dasd_io_alias); seq_printf(m, "total_hpf %u\n", data->dasd_io_tpm); + seq_printf(m, "avg_total %lu\n", data->dasd_io_reqs ? + data->dasd_sum_times / data->dasd_io_reqs : 0UL); + seq_printf(m, "avg_build_to_ssch %lu\n", data->dasd_io_reqs ? + data->dasd_sum_time_str / data->dasd_io_reqs : 0UL); + seq_printf(m, "avg_ssch_to_irq %lu\n", data->dasd_io_reqs ? + data->dasd_sum_time_irq / data->dasd_io_reqs : 0UL); + seq_printf(m, "avg_irq_to_end %lu\n", data->dasd_io_reqs ? + data->dasd_sum_time_end / data->dasd_io_reqs : 0UL); seq_puts(m, "histogram_sectors "); dasd_stats_array(m, data->dasd_io_secs); seq_puts(m, "histogram_io_times "); @@ -1639,7 +1665,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, { struct dasd_ccw_req *cqr, *next; struct dasd_device *device; - unsigned long long now; + unsigned long now; int nrf_suppressed = 0; int fp_suppressed = 0; u8 *sense = NULL; @@ -3152,7 +3178,9 @@ static int dasd_alloc_queue(struct dasd_block *block) */ static void dasd_setup_queue(struct dasd_block *block) { + unsigned int logical_block_size = block->bp_block; struct request_queue *q = block->request_queue; + unsigned int max_bytes, max_discard_sectors; int max; if (block->base->features & DASD_FEATURE_USERAW) { @@ -3169,7 +3197,7 @@ static void dasd_setup_queue(struct dasd_block *block) } queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); q->limits.max_dev_sectors = max; - blk_queue_logical_block_size(q, block->bp_block); + blk_queue_logical_block_size(q, logical_block_size); blk_queue_max_hw_sectors(q, max); blk_queue_max_segments(q, USHRT_MAX); /* with page sized segments we can translate each segement into @@ -3177,6 +3205,21 @@ static void dasd_setup_queue(struct dasd_block *block) */ blk_queue_max_segment_size(q, PAGE_SIZE); blk_queue_segment_boundary(q, PAGE_SIZE - 1); + + /* Only activate blocklayer discard support for devices that support it */ + if (block->base->features & DASD_FEATURE_DISCARD) { + q->limits.discard_granularity = logical_block_size; + q->limits.discard_alignment = PAGE_SIZE; + + /* Calculate max_discard_sectors and make it PAGE aligned */ + max_bytes = USHRT_MAX * logical_block_size; + max_bytes = ALIGN(max_bytes, PAGE_SIZE) - PAGE_SIZE; + max_discard_sectors = max_bytes / logical_block_size; + + blk_queue_max_discard_sectors(q, max_discard_sectors); + blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); + } } /* diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index 107cd3361e29..e448a0fc0c09 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -2231,7 +2231,7 @@ static void dasd_3990_erp_account_error(struct dasd_ccw_req *erp) struct dasd_device *device = erp->startdev; __u8 lpum = erp->refers->irb.esw.esw1.lpum; int pos = pathmask_to_pos(lpum); - unsigned long long clk; + unsigned long clk; if (!device->path_thrhld) return; diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 779dce069cc5..e38042ce94e6 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -1634,7 +1634,7 @@ static struct attribute * dasd_attrs[] = { NULL, }; -static struct attribute_group dasd_attr_group = { +static const struct attribute_group dasd_attr_group = { .attrs = dasd_attrs, }; @@ -1676,6 +1676,7 @@ dasd_set_feature(struct ccw_device *cdev, int feature, int flag) spin_unlock(&dasd_devmap_lock); return 0; } +EXPORT_SYMBOL(dasd_set_feature); int diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index 5667146c6a0a..98fb28e49d2c 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -235,7 +235,7 @@ static void dasd_ext_handler(struct ext_code ext_code, { struct dasd_ccw_req *cqr, *next; struct dasd_device *device; - unsigned long long expires; + unsigned long expires; unsigned long flags; addr_t ip; int rc; diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index c3e5ad641b0b..8eafcd5fa004 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -3254,11 +3254,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track( /* 1x prefix + one read/write ccw per track */ cplength = 1 + trkcount; - /* on 31-bit we need space for two 32 bit addresses per page - * on 64-bit one 64 bit address - */ - datasize = sizeof(struct PFX_eckd_data) + - cidaw * sizeof(unsigned long long); + datasize = sizeof(struct PFX_eckd_data) + cidaw * sizeof(unsigned long); /* Allocate the ccw request. */ cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize, @@ -3856,7 +3852,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, } size = ALIGN(size, 8); - datasize = size + cidaw * sizeof(unsigned long long); + datasize = size + cidaw * sizeof(unsigned long); /* Allocate the ccw request. */ cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, diff --git a/drivers/s390/block/dasd_eckd.h b/drivers/s390/block/dasd_eckd.h index fb1f537d986a..34e153a6b19c 100644 --- a/drivers/s390/block/dasd_eckd.h +++ b/drivers/s390/block/dasd_eckd.h @@ -165,7 +165,7 @@ struct DE_eckd_data { __u8 ga_extended; /* Global Attributes Extended */ struct ch_t beg_ext; struct ch_t end_ext; - unsigned long long ep_sys_time; /* Ext Parameter - System Time Stamp */ + unsigned long ep_sys_time; /* Ext Parameter - System Time Stamp */ __u8 ep_format; /* Extended Parameter format byte */ __u8 ep_prio; /* Extended Parameter priority I/O byte */ __u8 ep_reserved1; /* Extended Parameter Reserved */ diff --git a/drivers/s390/block/dasd_erp.c b/drivers/s390/block/dasd_erp.c index 9e3419124264..6389feb2fb7a 100644 --- a/drivers/s390/block/dasd_erp.c +++ b/drivers/s390/block/dasd_erp.c @@ -124,7 +124,7 @@ dasd_default_erp_action(struct dasd_ccw_req *cqr) struct dasd_ccw_req *dasd_default_erp_postaction(struct dasd_ccw_req *cqr) { int success; - unsigned long long startclk, stopclk; + unsigned long startclk, stopclk; struct dasd_device *startdev; BUG_ON(cqr->refers == NULL || cqr->function == NULL); diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index 462cab5d4302..6168ccdb389c 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -174,6 +174,9 @@ dasd_fba_check_characteristics(struct dasd_device *device) if (readonly) set_bit(DASD_FLAG_DEVICE_RO, &device->flags); + /* FBA supports discard, set the according feature bit */ + dasd_set_feature(cdev, DASD_FEATURE_DISCARD, 1); + dev_info(&device->cdev->dev, "New FBA DASD %04X/%02X (CU %04X/%02X) with %d MB " "and %d B/blk%s\n", @@ -247,9 +250,192 @@ static void dasd_fba_check_for_device_change(struct dasd_device *device, dasd_generic_handle_state_change(device); }; -static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev, - struct dasd_block *block, - struct request *req) + +/* + * Builds a CCW with no data payload + */ +static void ccw_write_no_data(struct ccw1 *ccw) +{ + ccw->cmd_code = DASD_FBA_CCW_WRITE; + ccw->flags |= CCW_FLAG_SLI; + ccw->count = 0; +} + +/* + * Builds a CCW that writes only zeroes. + */ +static void ccw_write_zero(struct ccw1 *ccw, int count) +{ + ccw->cmd_code = DASD_FBA_CCW_WRITE; + ccw->flags |= CCW_FLAG_SLI; + ccw->count = count; + ccw->cda = (__u32) (addr_t) page_to_phys(ZERO_PAGE(0)); +} + +/* + * Helper function to count the amount of necessary CCWs within a given range + * with 4k alignment and command chaining in mind. + */ +static int count_ccws(sector_t first_rec, sector_t last_rec, + unsigned int blocks_per_page) +{ + sector_t wz_stop = 0, d_stop = 0; + int cur_pos = 0; + int count = 0; + + if (first_rec % blocks_per_page != 0) { + wz_stop = first_rec + blocks_per_page - + (first_rec % blocks_per_page) - 1; + if (wz_stop > last_rec) + wz_stop = last_rec; + cur_pos = wz_stop - first_rec + 1; + count++; + } + + if (last_rec - (first_rec + cur_pos) + 1 >= blocks_per_page) { + if ((last_rec - blocks_per_page + 1) % blocks_per_page != 0) + d_stop = last_rec - ((last_rec - blocks_per_page + 1) % + blocks_per_page); + else + d_stop = last_rec; + + cur_pos += d_stop - (first_rec + cur_pos) + 1; + count++; + } + + if (cur_pos == 0 || first_rec + cur_pos - 1 < last_rec) + count++; + + return count; +} + +/* + * This function builds a CCW request for block layer discard requests. + * Each page in the z/VM hypervisor that represents certain records of an FBA + * device will be padded with zeros. This is a special behaviour of the WRITE + * command which is triggered when no data payload is added to the CCW. + * + * Note: Due to issues in some z/VM versions, we can't fully utilise this + * special behaviour. We have to keep a 4k (or 8 block) alignment in mind to + * work around those issues and write actual zeroes to the unaligned parts in + * the request. This workaround might be removed in the future. + */ +static struct dasd_ccw_req *dasd_fba_build_cp_discard( + struct dasd_device *memdev, + struct dasd_block *block, + struct request *req) +{ + struct LO_fba_data *LO_data; + struct dasd_ccw_req *cqr; + struct ccw1 *ccw; + + sector_t wz_stop = 0, d_stop = 0; + sector_t first_rec, last_rec; + + unsigned int blksize = block->bp_block; + unsigned int blocks_per_page; + int wz_count = 0; + int d_count = 0; + int cur_pos = 0; /* Current position within the extent */ + int count = 0; + int cplength; + int datasize; + int nr_ccws; + + first_rec = blk_rq_pos(req) >> block->s2b_shift; + last_rec = + (blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift; + count = last_rec - first_rec + 1; + + blocks_per_page = BLOCKS_PER_PAGE(blksize); + nr_ccws = count_ccws(first_rec, last_rec, blocks_per_page); + + /* define extent + nr_ccws * locate record + nr_ccws * single CCW */ + cplength = 1 + 2 * nr_ccws; + datasize = sizeof(struct DE_fba_data) + + nr_ccws * (sizeof(struct LO_fba_data) + sizeof(struct ccw1)); + + cqr = dasd_smalloc_request(DASD_FBA_MAGIC, cplength, datasize, memdev); + if (IS_ERR(cqr)) + return cqr; + + ccw = cqr->cpaddr; + + define_extent(ccw++, cqr->data, WRITE, blksize, first_rec, count); + LO_data = cqr->data + sizeof(struct DE_fba_data); + + /* First part is not aligned. Calculate range to write zeroes. */ + if (first_rec % blocks_per_page != 0) { + wz_stop = first_rec + blocks_per_page - + (first_rec % blocks_per_page) - 1; + if (wz_stop > last_rec) + wz_stop = last_rec; + wz_count = wz_stop - first_rec + 1; + + ccw[-1].flags |= CCW_FLAG_CC; + locate_record(ccw++, LO_data++, WRITE, cur_pos, wz_count); + + ccw[-1].flags |= CCW_FLAG_CC; + ccw_write_zero(ccw++, wz_count * blksize); + + cur_pos = wz_count; + } + + /* We can do proper discard when we've got at least blocks_per_page blocks. */ + if (last_rec - (first_rec + cur_pos) + 1 >= blocks_per_page) { + /* is last record at page boundary? */ + if ((last_rec - blocks_per_page + 1) % blocks_per_page != 0) + d_stop = last_rec - ((last_rec - blocks_per_page + 1) % + blocks_per_page); + else + d_stop = last_rec; + + d_count = d_stop - (first_rec + cur_pos) + 1; + + ccw[-1].flags |= CCW_FLAG_CC; + locate_record(ccw++, LO_data++, WRITE, cur_pos, d_count); + + ccw[-1].flags |= CCW_FLAG_CC; + ccw_write_no_data(ccw++); + + cur_pos += d_count; + } + + /* We might still have some bits left which need to be zeroed. */ + if (cur_pos == 0 || first_rec + cur_pos - 1 < last_rec) { + if (d_stop != 0) + wz_count = last_rec - d_stop; + else if (wz_stop != 0) + wz_count = last_rec - wz_stop; + else + wz_count = count; + + ccw[-1].flags |= CCW_FLAG_CC; + locate_record(ccw++, LO_data++, WRITE, cur_pos, wz_count); + + ccw[-1].flags |= CCW_FLAG_CC; + ccw_write_zero(ccw++, wz_count * blksize); + } + + if (blk_noretry_request(req) || + block->base->features & DASD_FEATURE_FAILFAST) + set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); + + cqr->startdev = memdev; + cqr->memdev = memdev; + cqr->block = block; + cqr->expires = memdev->default_expires * HZ; /* default 5 minutes */ + cqr->retries = memdev->default_retries; + cqr->buildclk = get_tod_clock(); + cqr->status = DASD_CQR_FILLED; + + return cqr; +} + +static struct dasd_ccw_req *dasd_fba_build_cp_regular( + struct dasd_device *memdev, + struct dasd_block *block, + struct request *req) { struct dasd_fba_private *private = block->base->private; unsigned long *idaws; @@ -372,6 +558,16 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev, return cqr; } +static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device *memdev, + struct dasd_block *block, + struct request *req) +{ + if (req_op(req) == REQ_OP_DISCARD || req_op(req) == REQ_OP_WRITE_ZEROES) + return dasd_fba_build_cp_discard(memdev, block, req); + else + return dasd_fba_build_cp_regular(memdev, block, req); +} + static int dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req) { diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index dca7cb1e6f65..f9e25fc03d6b 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -167,6 +167,9 @@ do { \ printk(d_loglevel PRINTK_HEADER " " d_string "\n", d_args); \ } while(0) +/* Macro to calculate number of blocks per page */ +#define BLOCKS_PER_PAGE(blksize) (PAGE_SIZE / blksize) + struct dasd_ccw_req { unsigned int magic; /* Eye catcher */ struct list_head devlist; /* for dasd_device request queue */ @@ -196,10 +199,10 @@ struct dasd_ccw_req { void *function; /* originating ERP action */ /* these are for statistics only */ - unsigned long long buildclk; /* TOD-clock of request generation */ - unsigned long long startclk; /* TOD-clock of request start */ - unsigned long long stopclk; /* TOD-clock of request interrupt */ - unsigned long long endclk; /* TOD-clock of request termination */ + unsigned long buildclk; /* TOD-clock of request generation */ + unsigned long startclk; /* TOD-clock of request start */ + unsigned long stopclk; /* TOD-clock of request interrupt */ + unsigned long endclk; /* TOD-clock of request termination */ /* Callback that is called after reaching final status. */ void (*callback)(struct dasd_ccw_req *, void *data); @@ -423,7 +426,7 @@ struct dasd_path { u8 chpid; struct dasd_conf_data *conf_data; atomic_t error_count; - unsigned long long errorclk; + unsigned long errorclk; }; @@ -454,6 +457,10 @@ struct dasd_profile_info { unsigned int dasd_read_time2[32]; /* hist. of time from start to irq */ unsigned int dasd_read_time3[32]; /* hist. of time from irq to end */ unsigned int dasd_read_nr_req[32]; /* hist. of # of requests in chanq */ + unsigned long dasd_sum_times; /* sum of request times */ + unsigned long dasd_sum_time_str; /* sum of time from build to start */ + unsigned long dasd_sum_time_irq; /* sum of time from start to irq */ + unsigned long dasd_sum_time_end; /* sum of time from irq to end */ }; struct dasd_profile { @@ -535,7 +542,7 @@ struct dasd_block { struct block_device *bdev; atomic_t open_count; - unsigned long long blocks; /* size of volume in blocks */ + unsigned long blocks; /* size of volume in blocks */ unsigned int bp_block; /* bytes per block */ unsigned int s2b_shift; /* log2 (bp_block/512) */ diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c index 70dc2c4cd3f7..7104d6765773 100644 --- a/drivers/s390/block/dasd_proc.c +++ b/drivers/s390/block/dasd_proc.c @@ -90,7 +90,7 @@ dasd_devices_show(struct seq_file *m, void *v) seq_printf(m, "n/f "); else seq_printf(m, - "at blocksize: %d, %lld blocks, %lld MB", + "at blocksize: %u, %lu blocks, %lu MB", block->bp_block, block->blocks, ((block->bp_block >> 9) * block->blocks) >> 11); diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index 0071febac9e6..2e7fd966c515 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -249,13 +249,13 @@ static void scm_request_requeue(struct scm_request *scmrq) static void scm_request_finish(struct scm_request *scmrq) { struct scm_blk_dev *bdev = scmrq->bdev; + int *error; int i; for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) { - if (scmrq->error) - blk_mq_end_request(scmrq->request[i], scmrq->error); - else - blk_mq_complete_request(scmrq->request[i]); + error = blk_mq_rq_to_pdu(scmrq->request[i]); + *error = scmrq->error; + blk_mq_complete_request(scmrq->request[i]); } atomic_dec(&bdev->queued_reqs); @@ -415,7 +415,9 @@ void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error) static void scm_blk_request_done(struct request *req) { - blk_mq_end_request(req, 0); + int *error = blk_mq_rq_to_pdu(req); + + blk_mq_end_request(req, *error); } static const struct block_device_operations scm_blk_devops = { @@ -448,6 +450,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) atomic_set(&bdev->queued_reqs, 0); bdev->tag_set.ops = &scm_mq_ops; + bdev->tag_set.cmd_size = sizeof(int); bdev->tag_set.nr_hw_queues = nr_requests; bdev->tag_set.queue_depth = nr_requests_per_io * nr_requests; bdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig index b3f1c458905f..97c4c9fdd53d 100644 --- a/drivers/s390/char/Kconfig +++ b/drivers/s390/char/Kconfig @@ -169,10 +169,21 @@ config VMCP def_bool y prompt "Support for the z/VM CP interface" depends on S390 + select CMA help Select this option if you want to be able to interact with the control program on z/VM +config VMCP_CMA_SIZE + int "Memory in MiB reserved for z/VM CP interface" + default "4" + depends on VMCP + help + Specify the default amount of memory in MiB reserved for the z/VM CP + interface. If needed this memory is used for large contiguous memory + allocations. The default can be changed with the kernel command line + parameter "vmcp_cma". + config MONREADER def_tristate m prompt "API for reading z/VM monitor service records" diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index 710f2292911d..5d4f053d7c38 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -1082,7 +1082,7 @@ static struct attribute * raw3270_attrs[] = { NULL, }; -static struct attribute_group raw3270_attr_group = { +static const struct attribute_group raw3270_attr_group = { .attrs = raw3270_attrs, }; diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index b9c5522b8a68..dff8b94871f0 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -252,6 +252,7 @@ static int sclp_attach_storage(u8 id) if (!sccb) return -ENOMEM; sccb->header.length = PAGE_SIZE; + sccb->header.function_code = 0x40; rc = sclp_sync_request_timeout(0x00080001 | id << 8, sccb, SCLP_QUEUE_INTERVAL); if (rc) diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c index 1406fb688a26..7003d52c2191 100644 --- a/drivers/s390/char/sclp_config.c +++ b/drivers/s390/char/sclp_config.c @@ -135,7 +135,7 @@ static ssize_t sysfs_ofb_data_write(struct file *filp, struct kobject *kobj, return rc ?: count; } -static struct bin_attribute ofb_bin_attr = { +static const struct bin_attribute ofb_bin_attr = { .attr = { .name = "event_data", .mode = S_IWUSR, diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index efd84d1d178b..bc1fc00910b0 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -39,7 +39,7 @@ struct read_info_sccb { u8 fac84; /* 84 */ u8 fac85; /* 85 */ u8 _pad_86[91 - 86]; /* 86-90 */ - u8 flags; /* 91 */ + u8 fac91; /* 91 */ u8 _pad_92[98 - 92]; /* 92-97 */ u8 fac98; /* 98 */ u8 hamaxpow; /* 99 */ @@ -103,6 +103,8 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb) sclp.has_kss = !!(sccb->fac98 & 0x01); if (sccb->fac85 & 0x02) S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; + if (sccb->fac91 & 0x40) + S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_GUEST; sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; sclp.rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2; sclp.rzm <<= 20; @@ -139,7 +141,7 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb) /* Save IPL information */ sclp_ipl_info.is_valid = 1; - if (sccb->flags & 0x2) + if (sccb->fac91 & 0x2) sclp_ipl_info.has_dump = 1; memcpy(&sclp_ipl_info.loadparm, &sccb->loadparm, LOADPARM_LEN); diff --git a/drivers/s390/char/sclp_ocf.c b/drivers/s390/char/sclp_ocf.c index f59b71776bbd..f9cbb1ab047b 100644 --- a/drivers/s390/char/sclp_ocf.c +++ b/drivers/s390/char/sclp_ocf.c @@ -126,7 +126,7 @@ static struct attribute *ocf_attrs[] = { NULL, }; -static struct attribute_group ocf_attr_group = { +static const struct attribute_group ocf_attr_group = { .attrs = ocf_attrs, }; diff --git a/drivers/s390/char/tape_core.c b/drivers/s390/char/tape_core.c index 3c379da2eef8..9dd4534823b3 100644 --- a/drivers/s390/char/tape_core.c +++ b/drivers/s390/char/tape_core.c @@ -175,7 +175,7 @@ static struct attribute *tape_attrs[] = { NULL }; -static struct attribute_group tape_attr_group = { +static const struct attribute_group tape_attr_group = { .attrs = tape_attrs, }; diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c index 98749fa817da..7898bbcc28fc 100644 --- a/drivers/s390/char/vmcp.c +++ b/drivers/s390/char/vmcp.c @@ -17,15 +17,85 @@ #include <linux/kernel.h> #include <linux/miscdevice.h> #include <linux/slab.h> +#include <linux/uaccess.h> #include <linux/export.h> +#include <linux/mutex.h> +#include <linux/cma.h> +#include <linux/mm.h> #include <asm/compat.h> #include <asm/cpcmd.h> #include <asm/debug.h> -#include <linux/uaccess.h> -#include "vmcp.h" +#include <asm/vmcp.h> + +struct vmcp_session { + char *response; + unsigned int bufsize; + unsigned int cma_alloc : 1; + int resp_size; + int resp_code; + struct mutex mutex; +}; static debug_info_t *vmcp_debug; +static unsigned long vmcp_cma_size __initdata = CONFIG_VMCP_CMA_SIZE * 1024 * 1024; +static struct cma *vmcp_cma; + +static int __init early_parse_vmcp_cma(char *p) +{ + vmcp_cma_size = ALIGN(memparse(p, NULL), PAGE_SIZE); + return 0; +} +early_param("vmcp_cma", early_parse_vmcp_cma); + +void __init vmcp_cma_reserve(void) +{ + if (!MACHINE_IS_VM) + return; + cma_declare_contiguous(0, vmcp_cma_size, 0, 0, 0, false, "vmcp", &vmcp_cma); +} + +static void vmcp_response_alloc(struct vmcp_session *session) +{ + struct page *page = NULL; + int nr_pages, order; + + order = get_order(session->bufsize); + nr_pages = ALIGN(session->bufsize, PAGE_SIZE) >> PAGE_SHIFT; + /* + * For anything below order 3 allocations rely on the buddy + * allocator. If such low-order allocations can't be handled + * anymore the system won't work anyway. + */ + if (order > 2) + page = cma_alloc(vmcp_cma, nr_pages, 0, GFP_KERNEL); + if (page) { + session->response = (char *)page_to_phys(page); + session->cma_alloc = 1; + return; + } + session->response = (char *)__get_free_pages(GFP_KERNEL | __GFP_RETRY_MAYFAIL, order); +} + +static void vmcp_response_free(struct vmcp_session *session) +{ + int nr_pages, order; + struct page *page; + + if (!session->response) + return; + order = get_order(session->bufsize); + nr_pages = ALIGN(session->bufsize, PAGE_SIZE) >> PAGE_SHIFT; + if (session->cma_alloc) { + page = phys_to_page((unsigned long)session->response); + cma_release(vmcp_cma, page, nr_pages); + session->cma_alloc = 0; + } else { + free_pages((unsigned long)session->response, order); + } + session->response = NULL; +} + static int vmcp_open(struct inode *inode, struct file *file) { struct vmcp_session *session; @@ -51,7 +121,7 @@ static int vmcp_release(struct inode *inode, struct file *file) session = file->private_data; file->private_data = NULL; - free_pages((unsigned long)session->response, get_order(session->bufsize)); + vmcp_response_free(session); kfree(session); return 0; } @@ -97,9 +167,7 @@ vmcp_write(struct file *file, const char __user *buff, size_t count, return -ERESTARTSYS; } if (!session->response) - session->response = (char *)__get_free_pages(GFP_KERNEL - | __GFP_RETRY_MAYFAIL | GFP_DMA, - get_order(session->bufsize)); + vmcp_response_alloc(session); if (!session->response) { mutex_unlock(&session->mutex); kfree(cmd); @@ -130,8 +198,8 @@ vmcp_write(struct file *file, const char __user *buff, size_t count, static long vmcp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct vmcp_session *session; + int ret = -ENOTTY; int __user *argp; - int temp; session = file->private_data; if (is_compat_task()) @@ -142,28 +210,26 @@ static long vmcp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return -ERESTARTSYS; switch (cmd) { case VMCP_GETCODE: - temp = session->resp_code; - mutex_unlock(&session->mutex); - return put_user(temp, argp); + ret = put_user(session->resp_code, argp); + break; case VMCP_SETBUF: - free_pages((unsigned long)session->response, - get_order(session->bufsize)); - session->response=NULL; - temp = get_user(session->bufsize, argp); - if (get_order(session->bufsize) > 8) { + vmcp_response_free(session); + ret = get_user(session->bufsize, argp); + if (ret) session->bufsize = PAGE_SIZE; - temp = -EINVAL; + if (!session->bufsize || get_order(session->bufsize) > 8) { + session->bufsize = PAGE_SIZE; + ret = -EINVAL; } - mutex_unlock(&session->mutex); - return temp; + break; case VMCP_GETSIZE: - temp = session->resp_size; - mutex_unlock(&session->mutex); - return put_user(temp, argp); + ret = put_user(session->resp_size, argp); + break; default: - mutex_unlock(&session->mutex); - return -ENOIOCTLCMD; + break; } + mutex_unlock(&session->mutex); + return ret; } static const struct file_operations vmcp_fops = { diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index 432fc40990bd..f4166f80c4d4 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -143,7 +143,7 @@ static ssize_t chp_measurement_chars_read(struct file *filp, sizeof(chp->cmg_chars)); } -static struct bin_attribute chp_measurement_chars_attr = { +static const struct bin_attribute chp_measurement_chars_attr = { .attr = { .name = "measurement_chars", .mode = S_IRUSR, @@ -197,7 +197,7 @@ static ssize_t chp_measurement_read(struct file *filp, struct kobject *kobj, return count; } -static struct bin_attribute chp_measurement_attr = { +static const struct bin_attribute chp_measurement_attr = { .attr = { .name = "measurement", .mode = S_IRUSR, diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 7be01a58b44f..489b583f263d 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -612,7 +612,7 @@ static struct attribute *io_subchannel_attrs[] = { NULL, }; -static struct attribute_group io_subchannel_attr_group = { +static const struct attribute_group io_subchannel_attr_group = { .attrs = io_subchannel_attrs, }; @@ -626,7 +626,7 @@ static struct attribute * ccwdev_attrs[] = { NULL, }; -static struct attribute_group ccwdev_attr_group = { +static const struct attribute_group ccwdev_attr_group = { .attrs = ccwdev_attrs, }; diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c index 53436ea52230..f85dacf1c284 100644 --- a/drivers/s390/crypto/zcrypt_card.c +++ b/drivers/s390/crypto/zcrypt_card.c @@ -98,7 +98,7 @@ static struct attribute *zcrypt_card_attrs[] = { NULL, }; -static struct attribute_group zcrypt_card_attr_group = { +static const struct attribute_group zcrypt_card_attr_group = { .attrs = zcrypt_card_attrs, }; diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 4fddb4319481..afd20cee7ea0 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -140,7 +140,7 @@ struct function_and_rules_block { * + 0x000A 'MRP ' (MCL3 'PK' or CEX2C 'PK') * - VUD block */ -static struct CPRBX static_cprbx = { +static const struct CPRBX static_cprbx = { .cprb_len = 0x00DC, .cprb_ver_id = 0x02, .func_id = {0x54, 0x32}, diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c index a303f3b2c328..4742be0eec24 100644 --- a/drivers/s390/crypto/zcrypt_queue.c +++ b/drivers/s390/crypto/zcrypt_queue.c @@ -89,7 +89,7 @@ static struct attribute *zcrypt_queue_attrs[] = { NULL, }; -static struct attribute_group zcrypt_queue_attr_group = { +static const struct attribute_group zcrypt_queue_attr_group = { .attrs = zcrypt_queue_attrs, }; diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index f2f94f59e0fa..1a80ce41425e 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -350,7 +350,7 @@ static struct attribute *qeth_l3_device_attrs[] = { NULL, }; -static struct attribute_group qeth_l3_device_attr_group = { +static const struct attribute_group qeth_l3_device_attr_group = { .attrs = qeth_l3_device_attrs, }; @@ -680,7 +680,7 @@ static struct attribute *qeth_ipato_device_attrs[] = { NULL, }; -static struct attribute_group qeth_device_ipato_group = { +static const struct attribute_group qeth_device_ipato_group = { .name = "ipa_takeover", .attrs = qeth_ipato_device_attrs, }; @@ -843,7 +843,7 @@ static struct attribute *qeth_vipa_device_attrs[] = { NULL, }; -static struct attribute_group qeth_device_vipa_group = { +static const struct attribute_group qeth_device_vipa_group = { .name = "vipa", .attrs = qeth_vipa_device_attrs, }; @@ -1006,7 +1006,7 @@ static struct attribute *qeth_rxip_device_attrs[] = { NULL, }; -static struct attribute_group qeth_device_rxip_group = { +static const struct attribute_group qeth_device_rxip_group = { .name = "rxip", .attrs = qeth_rxip_device_attrs, }; |