diff options
Diffstat (limited to 'arch/s390/include/asm/pgtable.h')
-rw-r--r-- | arch/s390/include/asm/pgtable.h | 292 |
1 files changed, 77 insertions, 215 deletions
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index e08ec38f8c6e..fc642399b489 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -12,12 +12,9 @@ #define _ASM_S390_PGTABLE_H /* - * The Linux memory management assumes a three-level page table setup. For - * s390 31 bit we "fold" the mid level into the top-level page table, so - * that we physically have the same two-level page table as the s390 mmu - * expects in 31 bit mode. For s390 64 bit we use three of the five levels - * the hardware provides (region first and region second tables are not - * used). + * The Linux memory management assumes a three-level page table setup. + * For s390 64 bit we use up to four of the five levels the hardware + * provides (region first tables are not used). * * The "pgd_xxx()" functions are trivial for a folded two-level * setup: the pgd is never bad, and a pmd always exists (as it's folded @@ -66,15 +63,9 @@ extern unsigned long zero_page_mask; * table can map * PGDIR_SHIFT determines what a third-level page table entry can map */ -#ifndef CONFIG_64BIT -# define PMD_SHIFT 20 -# define PUD_SHIFT 20 -# define PGDIR_SHIFT 20 -#else /* CONFIG_64BIT */ -# define PMD_SHIFT 20 -# define PUD_SHIFT 31 -# define PGDIR_SHIFT 42 -#endif /* CONFIG_64BIT */ +#define PMD_SHIFT 20 +#define PUD_SHIFT 31 +#define PGDIR_SHIFT 42 #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) @@ -90,15 +81,8 @@ extern unsigned long zero_page_mask; * that leads to 1024 pte per pgd */ #define PTRS_PER_PTE 256 -#ifndef CONFIG_64BIT -#define __PAGETABLE_PUD_FOLDED -#define PTRS_PER_PMD 1 -#define __PAGETABLE_PMD_FOLDED -#define PTRS_PER_PUD 1 -#else /* CONFIG_64BIT */ #define PTRS_PER_PMD 2048 #define PTRS_PER_PUD 2048 -#endif /* CONFIG_64BIT */ #define PTRS_PER_PGD 2048 #define FIRST_USER_ADDRESS 0UL @@ -114,8 +98,8 @@ extern unsigned long zero_page_mask; #ifndef __ASSEMBLY__ /* - * The vmalloc and module area will always be on the topmost area of the kernel - * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc and modules. + * The vmalloc and module area will always be on the topmost area of the + * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules. * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where * modules will reside. That makes sure that inter module branches always * happen without trampolines and in addition the placement within a 2GB frame @@ -127,59 +111,23 @@ extern struct page *vmemmap; #define VMEM_MAX_PHYS ((unsigned long) vmemmap) -#ifdef CONFIG_64BIT extern unsigned long MODULES_VADDR; extern unsigned long MODULES_END; #define MODULES_VADDR MODULES_VADDR #define MODULES_END MODULES_END #define MODULES_LEN (1UL << 31) -#endif static inline int is_module_addr(void *addr) { -#ifdef CONFIG_64BIT BUILD_BUG_ON(MODULES_LEN > (1UL << 31)); if (addr < (void *)MODULES_VADDR) return 0; if (addr > (void *)MODULES_END) return 0; -#endif return 1; } /* - * A 31 bit pagetable entry of S390 has following format: - * | PFRA | | OS | - * 0 0IP0 - * 00000000001111111111222222222233 - * 01234567890123456789012345678901 - * - * I Page-Invalid Bit: Page is not available for address-translation - * P Page-Protection Bit: Store access not possible for page - * - * A 31 bit segmenttable entry of S390 has following format: - * | P-table origin | |PTL - * 0 IC - * 00000000001111111111222222222233 - * 01234567890123456789012345678901 - * - * I Segment-Invalid Bit: Segment is not available for address-translation - * C Common-Segment Bit: Segment is not private (PoP 3-30) - * PTL Page-Table-Length: Page-table length (PTL+1*16 entries -> up to 256) - * - * The 31 bit segmenttable origin of S390 has following format: - * - * |S-table origin | | STL | - * X **GPS - * 00000000001111111111222222222233 - * 01234567890123456789012345678901 - * - * X Space-Switch event: - * G Segment-Invalid Bit: * - * P Private-Space Bit: Segment is not private (PoP 3-30) - * S Storage-Alteration: - * STL Segment-Table-Length: Segment-table length (STL+1*16 entries -> up to 2048) - * * A 64 bit pagetable entry of S390 has following format: * | PFRA |0IPC| OS | * 0000000000111111111122222222223333333333444444444455555555556666 @@ -237,7 +185,6 @@ static inline int is_module_addr(void *addr) /* Software bits in the page table entry */ #define _PAGE_PRESENT 0x001 /* SW pte present bit */ -#define _PAGE_TYPE 0x002 /* SW pte type bit */ #define _PAGE_YOUNG 0x004 /* SW pte young bit */ #define _PAGE_DIRTY 0x008 /* SW pte dirty bit */ #define _PAGE_READ 0x010 /* SW pte read bit */ @@ -257,83 +204,36 @@ static inline int is_module_addr(void *addr) * table lock held. * * The following table gives the different possible bit combinations for - * the pte hardware and software bits in the last 12 bits of a pte: + * the pte hardware and software bits in the last 12 bits of a pte + * (. unassigned bit, x don't care, t swap type): * * 842100000000 * 000084210000 * 000000008421 - * .IR...wrdytp - * empty .10...000000 - * swap .10...xxxx10 - * file .11...xxxxx0 - * prot-none, clean, old .11...000001 - * prot-none, clean, young .11...000101 - * prot-none, dirty, old .10...001001 - * prot-none, dirty, young .10...001101 - * read-only, clean, old .11...010001 - * read-only, clean, young .01...010101 - * read-only, dirty, old .11...011001 - * read-only, dirty, young .01...011101 - * read-write, clean, old .11...110001 - * read-write, clean, young .01...110101 - * read-write, dirty, old .10...111001 - * read-write, dirty, young .00...111101 + * .IR.uswrdy.p + * empty .10.00000000 + * swap .11..ttttt.0 + * prot-none, clean, old .11.xx0000.1 + * prot-none, clean, young .11.xx0001.1 + * prot-none, dirty, old .10.xx0010.1 + * prot-none, dirty, young .10.xx0011.1 + * read-only, clean, old .11.xx0100.1 + * read-only, clean, young .01.xx0101.1 + * read-only, dirty, old .11.xx0110.1 + * read-only, dirty, young .01.xx0111.1 + * read-write, clean, old .11.xx1100.1 + * read-write, clean, young .01.xx1101.1 + * read-write, dirty, old .10.xx1110.1 + * read-write, dirty, young .00.xx1111.1 + * HW-bits: R read-only, I invalid + * SW-bits: p present, y young, d dirty, r read, w write, s special, + * u unused, l large * - * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001 - * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400 - * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402 + * pte_none is true for the bit pattern .10.00000000, pte == 0x400 + * pte_swap is true for the bit pattern .11..ooooo.0, (pte & 0x201) == 0x200 + * pte_present is true for the bit pattern .xx.xxxxxx.1, (pte & 0x001) == 0x001 */ -#ifndef CONFIG_64BIT - -/* Bits in the segment table address-space-control-element */ -#define _ASCE_SPACE_SWITCH 0x80000000UL /* space switch event */ -#define _ASCE_ORIGIN_MASK 0x7ffff000UL /* segment table origin */ -#define _ASCE_PRIVATE_SPACE 0x100 /* private space control */ -#define _ASCE_ALT_EVENT 0x80 /* storage alteration event control */ -#define _ASCE_TABLE_LENGTH 0x7f /* 128 x 64 entries = 8k */ - -/* Bits in the segment table entry */ -#define _SEGMENT_ENTRY_BITS 0x7fffffffUL /* Valid segment table bits */ -#define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */ -#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ -#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ -#define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ -#define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ - -#define _SEGMENT_ENTRY_DIRTY 0 /* No sw dirty bit for 31-bit */ -#define _SEGMENT_ENTRY_YOUNG 0 /* No sw young bit for 31-bit */ -#define _SEGMENT_ENTRY_READ 0 /* No sw read bit for 31-bit */ -#define _SEGMENT_ENTRY_WRITE 0 /* No sw write bit for 31-bit */ -#define _SEGMENT_ENTRY_LARGE 0 /* No large pages for 31-bit */ -#define _SEGMENT_ENTRY_BITS_LARGE 0 -#define _SEGMENT_ENTRY_ORIGIN_LARGE 0 - -#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL) -#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) - -/* - * Segment table entry encoding (I = invalid, R = read-only bit): - * ..R...I..... - * prot-none ..1...1..... - * read-only ..1...0..... - * read-write ..0...0..... - * empty ..0...1..... - */ - -/* Page status table bits for virtualization */ -#define PGSTE_ACC_BITS 0xf0000000UL -#define PGSTE_FP_BIT 0x08000000UL -#define PGSTE_PCL_BIT 0x00800000UL -#define PGSTE_HR_BIT 0x00400000UL -#define PGSTE_HC_BIT 0x00200000UL -#define PGSTE_GR_BIT 0x00040000UL -#define PGSTE_GC_BIT 0x00020000UL -#define PGSTE_UC_BIT 0x00008000UL /* user dirty (migration) */ -#define PGSTE_IN_BIT 0x00004000UL /* IPTE notify bit */ - -#else /* CONFIG_64BIT */ - /* Bits in the segment/region table address-space-control-element */ #define _ASCE_ORIGIN ~0xfffUL/* segment table origin */ #define _ASCE_PRIVATE_SPACE 0x100 /* private space control */ @@ -402,6 +302,8 @@ static inline int is_module_addr(void *addr) * read-write, dirty, young 11..0...0...11 * The segment table origin is used to distinguish empty (origin==0) from * read-write, old segment table entries (origin!=0) + * HW-bits: R read-only, I invalid + * SW-bits: y young, d dirty, r read, w write */ #define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */ @@ -417,8 +319,6 @@ static inline int is_module_addr(void *addr) #define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */ #define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */ -#endif /* CONFIG_64BIT */ - /* Guest Page State used for virtualization */ #define _PGSTE_GPS_ZERO 0x0000000080000000UL #define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL @@ -492,6 +392,15 @@ static inline int mm_has_pgste(struct mm_struct *mm) return 0; } +static inline int mm_alloc_pgste(struct mm_struct *mm) +{ +#ifdef CONFIG_PGSTE + if (unlikely(mm->context.alloc_pgste)) + return 1; +#endif + return 0; +} + /* * In the case that a guest uses storage keys * faults should no longer be backed by zero pages @@ -509,19 +418,6 @@ static inline int mm_use_skey(struct mm_struct *mm) /* * pgd/pmd/pte query functions */ -#ifndef CONFIG_64BIT - -static inline int pgd_present(pgd_t pgd) { return 1; } -static inline int pgd_none(pgd_t pgd) { return 0; } -static inline int pgd_bad(pgd_t pgd) { return 0; } - -static inline int pud_present(pud_t pud) { return 1; } -static inline int pud_none(pud_t pud) { return 0; } -static inline int pud_large(pud_t pud) { return 0; } -static inline int pud_bad(pud_t pud) { return 0; } - -#else /* CONFIG_64BIT */ - static inline int pgd_present(pgd_t pgd) { if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) @@ -583,8 +479,6 @@ static inline int pud_bad(pud_t pud) return (pud_val(pud) & mask) != 0; } -#endif /* CONFIG_64BIT */ - static inline int pmd_present(pmd_t pmd) { return pmd_val(pmd) != _SEGMENT_ENTRY_INVALID; @@ -666,10 +560,9 @@ static inline int pte_none(pte_t pte) static inline int pte_swap(pte_t pte) { - /* Bit pattern: (pte & 0x603) == 0x402 */ - return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | - _PAGE_TYPE | _PAGE_PRESENT)) - == (_PAGE_INVALID | _PAGE_TYPE); + /* Bit pattern: (pte & 0x201) == 0x200 */ + return (pte_val(pte) & (_PAGE_PROTECT | _PAGE_PRESENT)) + == _PAGE_PROTECT; } static inline int pte_special(pte_t pte) @@ -916,18 +809,14 @@ static inline int pte_unused(pte_t pte) static inline void pgd_clear(pgd_t *pgd) { -#ifdef CONFIG_64BIT if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; -#endif } static inline void pud_clear(pud_t *pud) { -#ifdef CONFIG_64BIT if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) pud_val(*pud) = _REGION3_ENTRY_EMPTY; -#endif } static inline void pmd_clear(pmd_t *pmdp) @@ -1026,10 +915,6 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep) { unsigned long pto = (unsigned long) ptep; -#ifndef CONFIG_64BIT - /* pto in ESA mode must point to the start of the segment table */ - pto &= 0x7ffffc00; -#endif /* Invalidation + global TLB flush for the pte */ asm volatile( " ipte %2,%3" @@ -1040,10 +925,6 @@ static inline void __ptep_ipte_local(unsigned long address, pte_t *ptep) { unsigned long pto = (unsigned long) ptep; -#ifndef CONFIG_64BIT - /* pto in ESA mode must point to the start of the segment table */ - pto &= 0x7ffffc00; -#endif /* Invalidation + local TLB flush for the pte */ asm volatile( " .insn rrf,0xb2210000,%2,%3,0,1" @@ -1054,10 +935,6 @@ static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep) { unsigned long pto = (unsigned long) ptep; -#ifndef CONFIG_64BIT - /* pto in ESA mode must point to the start of the segment table */ - pto &= 0x7ffffc00; -#endif /* Invalidate a range of ptes + global TLB flush of the ptes */ do { asm volatile( @@ -1376,17 +1253,6 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) #define pgd_offset_k(address) pgd_offset(&init_mm, address) -#ifndef CONFIG_64BIT - -#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) -#define pud_deref(pmd) ({ BUG(); 0UL; }) -#define pgd_deref(pmd) ({ BUG(); 0UL; }) - -#define pud_offset(pgd, address) ((pud_t *) pgd) -#define pmd_offset(pud, address) ((pmd_t *) pud + pmd_index(address)) - -#else /* CONFIG_64BIT */ - #define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) #define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN) #define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) @@ -1407,8 +1273,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) return pmd + pmd_index(address); } -#endif /* CONFIG_64BIT */ - #define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot)) #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) #define pte_page(x) pfn_to_page(pte_pfn(x)) @@ -1699,53 +1563,51 @@ static inline int has_transparent_hugepage(void) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* - * 31 bit swap entry format: - * A page-table entry has some bits we have to treat in a special way. - * Bits 0, 20 and bit 23 have to be zero, otherwise an specification - * exception will occur instead of a page translation exception. The - * specifiation exception has the bad habit not to store necessary - * information in the lowcore. - * Bits 21, 22, 30 and 31 are used to indicate the page type. - * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 - * This leaves the bits 1-19 and bits 24-29 to store type and offset. - * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19 - * plus 24 for the offset. - * 0| offset |0110|o|type |00| - * 0 0000000001111111111 2222 2 22222 33 - * 0 1234567890123456789 0123 4 56789 01 - * * 64 bit swap entry format: * A page-table entry has some bits we have to treat in a special way. * Bits 52 and bit 55 have to be zero, otherwise an specification * exception will occur instead of a page translation exception. The * specifiation exception has the bad habit not to store necessary * information in the lowcore. - * Bits 53, 54, 62 and 63 are used to indicate the page type. - * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 - * This leaves the bits 0-51 and bits 56-61 to store type and offset. - * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51 - * plus 56 for the offset. - * | offset |0110|o|type |00| - * 0000000000111111111122222222223333333333444444444455 5555 5 55566 66 - * 0123456789012345678901234567890123456789012345678901 2345 6 78901 23 + * Bits 54 and 63 are used to indicate the page type. + * A swap pte is indicated by bit pattern (pte & 0x201) == 0x200 + * This leaves the bits 0-51 and bits 56-62 to store type and offset. + * We use the 5 bits from 57-61 for the type and the 52 bits from 0-51 + * for the offset. + * | offset |01100|type |00| + * |0000000000111111111122222222223333333333444444444455|55555|55566|66| + * |0123456789012345678901234567890123456789012345678901|23456|78901|23| */ -#ifndef CONFIG_64BIT -#define __SWP_OFFSET_MASK (~0UL >> 12) -#else -#define __SWP_OFFSET_MASK (~0UL >> 11) -#endif + +#define __SWP_OFFSET_MASK ((1UL << 52) - 1) +#define __SWP_OFFSET_SHIFT 12 +#define __SWP_TYPE_MASK ((1UL << 5) - 1) +#define __SWP_TYPE_SHIFT 2 + static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) { pte_t pte; - offset &= __SWP_OFFSET_MASK; - pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) | - ((offset & 1UL) << 7) | ((offset & ~1UL) << 11); + + pte_val(pte) = _PAGE_INVALID | _PAGE_PROTECT; + pte_val(pte) |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT; + pte_val(pte) |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT; return pte; } -#define __swp_type(entry) (((entry).val >> 2) & 0x1f) -#define __swp_offset(entry) (((entry).val >> 11) | (((entry).val >> 7) & 1)) -#define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) }) +static inline unsigned long __swp_type(swp_entry_t entry) +{ + return (entry.val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK; +} + +static inline unsigned long __swp_offset(swp_entry_t entry) +{ + return (entry.val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK; +} + +static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) +{ + return (swp_entry_t) { pte_val(mk_swap_pte(type, offset)) }; +} #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) |