From 13b5bd8af41ca56fd11cc0281f2a1201d8342233 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 13 Oct 2020 16:19:37 +0200 Subject: s390/head: set io/ext handlers to disabled wait Set io/ext handlers to disabled wait in the initial lowcore, so that they are effective right from the kernel start, when a boot method used does not rewrite this part of the lowcore for its own needs (i.e. kexec, z/vm ipl reader boot, qemu direct boot, load from removable media or server). When the kernel is loaded by zipl, scsi loader or qemu loader, some or all of the io/ext/pgm handlers addresses might be rewritten. Rewrite them to initial values again as early as possible. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/boot/head.S | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 1a2c2b1ed964..bc34ad2c2e49 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -62,8 +62,12 @@ __HEAD .org __LC_RST_NEW_PSW # 0x1a0 .quad 0,iplstart + .org __LC_EXT_NEW_PSW # 0x1b0 + .quad 0x0002000180000000,0x1b0 # disabled wait .org __LC_PGM_NEW_PSW # 0x1d0 .quad 0x0000000180000000,startup_pgm_check_handler + .org __LC_IO_NEW_PSW # 0x1f0 + .quad 0x0002000180000000,0x1f0 # disabled wait .org 0x200 @@ -303,6 +307,9 @@ ENTRY(startup_kdump) sam64 # switch to 64 bit addressing mode basr %r13,0 # get base .LPG0: + mvc __LC_EXT_NEW_PSW(16),.Lext_new_psw-.LPG0(%r13) + mvc __LC_PGM_NEW_PSW(16),.Lpgm_new_psw-.LPG0(%r13) + mvc __LC_IO_NEW_PSW(16),.Lio_new_psw-.LPG0(%r13) xc 0x200(256),0x200 # partially clear lowcore xc 0x300(256),0x300 xc 0xe00(256),0xe00 @@ -320,7 +327,12 @@ ENTRY(startup_kdump) .long 0x8000 + (1<<(PAGE_SHIFT+BOOT_STACK_ORDER)) - STACK_FRAME_OVERHEAD .align 8 6: .long 0x7fffffff,0xffffffff - +.Lext_new_psw: + .quad 0x0002000180000000,0x1b0 # disabled wait +.Lpgm_new_psw: + .quad 0x0000000180000000,startup_pgm_check_handler +.Lio_new_psw: + .quad 0x0002000180000000,0x1f0 # disabled wait .Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space .quad 0 # cr1: primary space segment table .quad .Lduct # cr2: dispatchable unit control table -- cgit v1.2.3-70-g09d2 From 85cde0192a983b227341be11af2c3625d39bc374 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 13 Oct 2020 22:35:27 +0200 Subject: s390/udelay: make it work for the early code Currently udelay relies on working EXT interrupts handler, which is not the case during early startup. In such cases udelay_simple() has to be used instead. To avoid mistakes of calling udelay too early, which could happen from the common code as well - make udelay work for the early code by introducing static branch and redirecting all udelay calls to udelay_simple until EXT interrupts handler is fully initialized and async stack is allocated. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/include/asm/delay.h | 1 + arch/s390/kernel/setup.c | 1 + arch/s390/lib/delay.c | 13 +++++++++++++ 3 files changed, 15 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/delay.h b/arch/s390/include/asm/delay.h index 898323fd93d2..4a08379cd1eb 100644 --- a/arch/s390/include/asm/delay.h +++ b/arch/s390/include/asm/delay.h @@ -13,6 +13,7 @@ #ifndef _S390_DELAY_H #define _S390_DELAY_H +void udelay_enable(void); void __ndelay(unsigned long long nsecs); void __udelay(unsigned long long usecs); void udelay_simple(unsigned long long usecs); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 4d843e64496f..2076415aee4b 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -336,6 +336,7 @@ int __init arch_early_irq_init(void) if (!stack) panic("Couldn't allocate async stack"); S390_lowcore.async_stack = stack + STACK_INIT_OFFSET; + udelay_enable(); return 0; } diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index daca7bad66de..1734a5c19834 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -13,11 +13,19 @@ #include #include #include +#include #include #include #include #include +static DEFINE_STATIC_KEY_FALSE(udelay_ready); + +void __init udelay_enable(void) +{ + static_branch_enable(&udelay_ready); +} + void __delay(unsigned long loops) { /* @@ -77,6 +85,11 @@ void __udelay(unsigned long long usecs) { unsigned long flags; + if (!static_branch_likely(&udelay_ready)) { + udelay_simple(usecs); + return; + } + preempt_disable(); local_irq_save(flags); if (in_irq()) { -- cgit v1.2.3-70-g09d2 From f38b0a743904cc785f5ba0d65dd1f60e17e80387 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 24 Sep 2020 17:12:08 +0200 Subject: s390: remove unused s390_base_ext_handler s390_base_ext_handler_fn haven't been used since its introduction in commit ab14de6c37fa ("[S390] Convert memory detection into C code."). s390_base_ext_handler itself is currently falsely storing 16 registers at __LC_SAVE_AREA_ASYNC rewriting several following lowcore values: cpu_flags, return_psw, return_mcck_psw, sync_enter_timer and async_enter_timer. Besides that s390_base_ext_handler itself is only potentially hiding EXT interrupts which should not have happen in the first place. Any piece of code which requires EXT interrupts before fully functional ext_int_handler is enabled has to do it on its own, like this is done by sclp_early_cmd() which is doing EXT interrupts handling synchronously in sclp_early_wait_irq(). With s390_base_ext_handler removed unexpected EXT interrupt leads to disabled wait with the address 0x1b0 (__LC_EXT_NEW_PSW), which is currently setup in the decompressor. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/include/asm/processor.h | 6 +----- arch/s390/kernel/base.S | 22 ---------------------- arch/s390/kernel/early.c | 4 +--- 3 files changed, 2 insertions(+), 30 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 962da04234af..2043c562ec55 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -318,14 +318,10 @@ static __always_inline void __noreturn disabled_wait(void) } /* - * Basic Machine Check/Program Check Handler. + * Basic Program Check Handler. */ - extern void s390_base_pgm_handler(void); -extern void s390_base_ext_handler(void); - extern void (*s390_base_pgm_handler_fn)(void); -extern void (*s390_base_ext_handler_fn)(void); #define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index b79e0fd571f8..d255c69c1779 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -11,32 +11,10 @@ #include #include #include -#include GEN_BR_THUNK %r9 GEN_BR_THUNK %r14 -ENTRY(s390_base_ext_handler) - stmg %r0,%r15,__LC_SAVE_AREA_ASYNC - basr %r13,0 -0: aghi %r15,-STACK_FRAME_OVERHEAD - larl %r1,s390_base_ext_handler_fn - lg %r9,0(%r1) - ltgr %r9,%r9 - jz 1f - BASR_EX %r14,%r9 -1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC - ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit - lpswe __LC_EXT_OLD_PSW -ENDPROC(s390_base_ext_handler) - - .section .bss - .align 8 - .globl s390_base_ext_handler_fn -s390_base_ext_handler_fn: - .quad 0 - .previous - ENTRY(s390_base_pgm_handler) stmg %r0,%r15,__LC_SAVE_AREA_SYNC basr %r13,0 diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 705844f73934..cc89763a4d3c 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -169,12 +169,10 @@ static noinline __init void setup_lowcore_early(void) { psw_t psw; + psw.addr = (unsigned long)s390_base_pgm_handler; psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; if (IS_ENABLED(CONFIG_KASAN)) psw.mask |= PSW_MASK_DAT; - psw.addr = (unsigned long) s390_base_ext_handler; - S390_lowcore.external_new_psw = psw; - psw.addr = (unsigned long) s390_base_pgm_handler; S390_lowcore.program_new_psw = psw; s390_base_pgm_handler_fn = early_pgm_check_handler; S390_lowcore.preempt_count = INIT_PREEMPT_COUNT; -- cgit v1.2.3-70-g09d2 From a3453d923ece6760689894bad5b6d5e00c0ffe2d Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 15 Oct 2020 10:01:42 +0200 Subject: s390/kasan: remove 3-level paging support Compiling the kernel with Kasan disables automatic 3-level vs 4-level kernel space paging selection, because the shadow memory offset has to be known at compile time and there is no such offset which would be acceptable for both 3 and 4-level paging. Instead S390_4_LEVEL_PAGING option was introduced which allowed to pick how many paging levels to use under Kasan. With the introduction of protected virtualization, kernel memory layout may be affected due to ultravisor secure storage limit. This adds additional complexity into how memory layout would look like in combination with Kasan predefined shadow memory offsets. To simplify this make Kasan 4-level paging default and remove Kasan 3-level paging support. Suggested-by: Heiko Carstens Reviewed-by: Alexander Gordeev Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 3 +-- arch/s390/include/asm/kasan.h | 5 ----- arch/s390/mm/kasan_init.c | 52 +++++++++---------------------------------- lib/Kconfig.kasan | 9 -------- 4 files changed, 11 insertions(+), 58 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 4a2a12be04c9..28bdb11e9db2 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -53,8 +53,7 @@ config ARCH_SUPPORTS_UPROBES config KASAN_SHADOW_OFFSET hex depends on KASAN - default 0x18000000000000 if KASAN_S390_4_LEVEL_PAGING - default 0x30000000000 + default 0x18000000000000 config S390 def_bool y diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h index e9bf486de136..528dfb9be77a 100644 --- a/arch/s390/include/asm/kasan.h +++ b/arch/s390/include/asm/kasan.h @@ -5,13 +5,8 @@ #ifdef CONFIG_KASAN #define KASAN_SHADOW_SCALE_SHIFT 3 -#ifdef CONFIG_KASAN_S390_4_LEVEL_PAGING #define KASAN_SHADOW_SIZE \ (_AC(1, UL) << (_REGION1_SHIFT - KASAN_SHADOW_SCALE_SHIFT)) -#else -#define KASAN_SHADOW_SIZE \ - (_AC(1, UL) << (_REGION2_SHIFT - KASAN_SHADOW_SCALE_SHIFT)) -#endif #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) #define KASAN_SHADOW_START KASAN_SHADOW_OFFSET #define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 5646b39c728a..fcb2b533b235 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -123,8 +123,7 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, pgd_populate(&init_mm, pg_dir, p4_dir); } - if (IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) && - mode == POPULATE_SHALLOW) { + if (mode == POPULATE_SHALLOW) { address = (address + P4D_SIZE) & P4D_MASK; continue; } @@ -143,12 +142,6 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, p4d_populate(&init_mm, p4_dir, pu_dir); } - if (!IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) && - mode == POPULATE_SHALLOW) { - address = (address + PUD_SIZE) & PUD_MASK; - continue; - } - pu_dir = pud_offset(p4_dir, address); if (pud_none(*pu_dir)) { if (mode == POPULATE_ZERO_SHADOW && @@ -281,7 +274,6 @@ void __init kasan_early_init(void) unsigned long shadow_alloc_size; unsigned long vmax_unlimited; unsigned long initrd_end; - unsigned long asce_type; unsigned long memsize; unsigned long pgt_prot = pgprot_val(PAGE_KERNEL_RO); pte_t pte_z; @@ -304,25 +296,12 @@ void __init kasan_early_init(void) memsize = min(memsize, OLDMEM_SIZE); memsize = min(memsize, KASAN_SHADOW_START); - if (IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING)) { - /* 4 level paging */ - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE)); - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE)); - crst_table_init((unsigned long *)early_pg_dir, - _REGION2_ENTRY_EMPTY); - untracked_mem_end = kasan_vmax = vmax_unlimited = _REGION1_SIZE; - if (has_uv_sec_stor_limit()) - kasan_vmax = min(vmax_unlimited, uv_info.max_sec_stor_addr); - asce_type = _ASCE_TYPE_REGION2; - } else { - /* 3 level paging */ - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PUD_SIZE)); - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE)); - crst_table_init((unsigned long *)early_pg_dir, - _REGION3_ENTRY_EMPTY); - untracked_mem_end = kasan_vmax = vmax_unlimited = _REGION2_SIZE; - asce_type = _ASCE_TYPE_REGION3; - } + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE)); + crst_table_init((unsigned long *)early_pg_dir, _REGION2_ENTRY_EMPTY); + untracked_mem_end = kasan_vmax = vmax_unlimited = _REGION1_SIZE; + if (has_uv_sec_stor_limit()) + kasan_vmax = min(vmax_unlimited, uv_info.max_sec_stor_addr); /* init kasan zero shadow */ crst_table_init((unsigned long *)kasan_early_shadow_p4d, @@ -408,7 +387,7 @@ void __init kasan_early_init(void) pgalloc_freeable = pgalloc_pos; /* populate identity mapping */ kasan_early_vmemmap_populate(0, memsize, POPULATE_ONE2ONE); - kasan_set_pgd(early_pg_dir, asce_type); + kasan_set_pgd(early_pg_dir, _ASCE_TYPE_REGION2); kasan_enable_dat(); /* enable kasan */ init_task.kasan_depth = 0; @@ -428,24 +407,13 @@ void __init kasan_copy_shadow(pgd_t *pg_dir) pgd_t *pg_dir_dst; p4d_t *p4_dir_src; p4d_t *p4_dir_dst; - pud_t *pu_dir_src; - pud_t *pu_dir_dst; pg_dir_src = pgd_offset_raw(early_pg_dir, KASAN_SHADOW_START); pg_dir_dst = pgd_offset_raw(pg_dir, KASAN_SHADOW_START); p4_dir_src = p4d_offset(pg_dir_src, KASAN_SHADOW_START); p4_dir_dst = p4d_offset(pg_dir_dst, KASAN_SHADOW_START); - if (!p4d_folded(*p4_dir_src)) { - /* 4 level paging */ - memcpy(p4_dir_dst, p4_dir_src, - (KASAN_SHADOW_SIZE >> P4D_SHIFT) * sizeof(p4d_t)); - return; - } - /* 3 level paging */ - pu_dir_src = pud_offset(p4_dir_src, KASAN_SHADOW_START); - pu_dir_dst = pud_offset(p4_dir_dst, KASAN_SHADOW_START); - memcpy(pu_dir_dst, pu_dir_src, - (KASAN_SHADOW_SIZE >> PUD_SHIFT) * sizeof(pud_t)); + memcpy(p4_dir_dst, p4_dir_src, + (KASAN_SHADOW_SIZE >> P4D_SHIFT) * sizeof(p4d_t)); } void __init kasan_free_early_identity(void) diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 542a9c18398e..8fb097057fec 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -136,15 +136,6 @@ config KASAN_STACK default 1 if KASAN_STACK_ENABLE || CC_IS_GCC default 0 -config KASAN_S390_4_LEVEL_PAGING - bool "KASan: use 4-level paging" - depends on S390 - help - Compiling the kernel with KASan disables automatic 3-level vs - 4-level paging selection. 3-level paging is used by default (up - to 3TB of RAM with KASan enabled). This options allows to force - 4-level paging instead. - config KASAN_SW_TAGS_IDENTIFY bool "Enable memory corruption identification" depends on KASAN_SW_TAGS -- cgit v1.2.3-70-g09d2 From 97b142b7400bdce93aa674df044a4bc58e88f08c Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 15 Oct 2020 10:20:08 +0200 Subject: s390: make sure vmemmap is top region table entry aligned Since commit 29d37e5b82f3 ("s390/protvirt: add ultravisor initialization") vmax is adjusted to the ultravisor secure storage limit. This limit is currently applied when 4-level paging is used. Later vmax is also used to align vmemmap address to the top region table entry border. When vmax is set to the ultravisor secure storage limit this is no longer the case. Instead of changing vmax, make only MODULES_END be affected by the secure storage limit, so that vmax stays intact for further vmemmap address alignment. Reviewed-by: Alexander Gordeev Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/kernel/setup.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 2076415aee4b..02bccee5ee85 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -568,13 +568,14 @@ static void __init setup_memory_end(void) vmax = _REGION2_SIZE; /* 3-level kernel page table */ else vmax = _REGION1_SIZE; /* 4-level kernel page table */ + /* module area is at the end of the kernel address space. */ + MODULES_END = vmax; if (is_prot_virt_host()) - adjust_to_uv_max(&vmax); + adjust_to_uv_max(&MODULES_END); #ifdef CONFIG_KASAN - vmax = kasan_vmax; + vmax = _REGION1_SIZE; + MODULES_END = kasan_vmax; #endif - /* module area is at the end of the kernel address space. */ - MODULES_END = vmax; MODULES_VADDR = MODULES_END - MODULES_LEN; VMALLOC_END = MODULES_VADDR; VMALLOC_START = VMALLOC_END - vmalloc_size; -- cgit v1.2.3-70-g09d2 From fc67c880e32a85786ad32129faa880f57bb6de41 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 18 Sep 2020 12:25:36 +0200 Subject: s390/mm: extend default vmalloc area size to 512GB We've seen several occurences in the past where the default vmalloc size of 128GB is not sufficient. Therefore extend the default size. Reviewed-by: Claudio Imbrenda Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/include/asm/pgtable.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index b5dbae78969b..a8edd96b2103 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -79,15 +79,15 @@ extern unsigned long zero_page_mask; /* * The vmalloc and module area will always be on the topmost area of the - * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules. - * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where - * modules will reside. That makes sure that inter module branches always - * happen without trampolines and in addition the placement within a 2GB frame - * is branch prediction unit friendly. + * kernel mapping. 512GB are reserved for vmalloc by default. + * At the top of the vmalloc area a 2GB area is reserved where modules + * will reside. That makes sure that inter module branches always + * happen without trampolines and in addition the placement within a + * 2GB frame is branch prediction unit friendly. */ extern unsigned long VMALLOC_START; extern unsigned long VMALLOC_END; -#define VMALLOC_DEFAULT_SIZE ((128UL << 30) - MODULES_LEN) +#define VMALLOC_DEFAULT_SIZE ((512UL << 30) - MODULES_LEN) extern struct page *vmemmap; extern unsigned long vmemmap_size; -- cgit v1.2.3-70-g09d2 From 90178c1900798633dd0c83ab693254b8705177c5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 18 Sep 2020 12:25:37 +0200 Subject: s390/mm: let vmalloc area size depend on physical memory size To make sure that the vmalloc area size is for almost all cases large enough let it depend on the (potential) physical memory size. There is still the possibility to override this with the vmalloc kernel command line parameter. Reviewed-by: Christian Borntraeger Reviewed-by: Claudio Imbrenda Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/boot/boot.h | 1 + arch/s390/boot/ipl_parm.c | 5 ++++- arch/s390/boot/startup.c | 16 ++++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 2ea603f70c3b..4d4536299789 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -14,6 +14,7 @@ void print_pgm_check_info(void); unsigned long get_random_base(unsigned long safe_addr); extern int kaslr_enabled; +extern int vmalloc_size_set; extern const char kernel_version[]; unsigned long read_ipl_report(unsigned long safe_offset); diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index f94b91d72620..33f43a7d03f3 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -21,6 +21,7 @@ unsigned long __bootdata(memory_end); int __bootdata(memory_end_set); int __bootdata(noexec_disabled); +int vmalloc_size_set; int kaslr_enabled; static inline int __diag308(unsigned long subcode, void *addr) @@ -242,8 +243,10 @@ void parse_boot_command_line(void) memory_end_set = 1; } - if (!strcmp(param, "vmalloc") && val) + if (!strcmp(param, "vmalloc") && val) { vmalloc_size = round_up(memparse(val, NULL), PAGE_SIZE); + vmalloc_size_set = 1; + } if (!strcmp(param, "dfltcc") && val) { if (!strcmp(val, "off")) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index cc96b04cc0ba..95d13a252ed9 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -126,6 +126,21 @@ static void clear_bss_section(void) memset((void *)vmlinux.default_lma + vmlinux.image_size, 0, vmlinux.bss_size); } +/* + * Set vmalloc area size to an 8th of (potential) physical memory + * size, unless size has been set by kernel command line parameter. + */ +static void setup_vmalloc_size(void) +{ + unsigned long size; + + if (vmalloc_size_set) + return; + size = (memory_end ?: max_physmem_end) >> 3; + size = round_up(size, _SEGMENT_SIZE); + vmalloc_size = max(size, vmalloc_size); +} + void startup_kernel(void) { unsigned long random_lma; @@ -142,6 +157,7 @@ void startup_kernel(void) parse_boot_command_line(); setup_memory_end(); detect_memory(); + setup_vmalloc_size(); random_lma = __kaslr_offset = 0; if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) { -- cgit v1.2.3-70-g09d2 From 39f2899b9872d02ec178568a9acfe90804d7fd70 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sat, 17 Oct 2020 15:55:10 +0200 Subject: s390/decompressor: fix build warning Fixes the following warning with CONFIG_KERNEL_UNCOMPRESSED=y arch/s390/boot/compressed/decompressor.h:6:46: warning: non-void function does not return a value [-Wreturn-type] static inline void *decompress_kernel(void) {} ^ Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/boot/compressed/decompressor.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/compressed/decompressor.h index c15eb7114d83..41f0ad97a4db 100644 --- a/arch/s390/boot/compressed/decompressor.h +++ b/arch/s390/boot/compressed/decompressor.h @@ -2,8 +2,10 @@ #ifndef BOOT_COMPRESSED_DECOMPRESSOR_H #define BOOT_COMPRESSED_DECOMPRESSOR_H +#include + #ifdef CONFIG_KERNEL_UNCOMPRESSED -static inline void *decompress_kernel(void) {} +static inline void *decompress_kernel(void) { return NULL; } #else void *decompress_kernel(void); #endif -- cgit v1.2.3-70-g09d2 From 92bca2fe61f5dbb23d9466d70b8fa3f2ad263ba8 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 5 Oct 2020 09:13:15 +0200 Subject: s390/kasan: avoid confusing naming Kasan has nothing to do with vmemmap, strip vmemmap from function names to avoid confusing people. Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/mm/kasan_init.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index fcb2b533b235..a43381f030d4 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -87,7 +87,7 @@ enum populate_mode { POPULATE_ZERO_SHADOW, POPULATE_SHALLOW }; -static void __init kasan_early_vmemmap_populate(unsigned long address, +static void __init kasan_early_pgtable_populate(unsigned long address, unsigned long end, enum populate_mode mode) { @@ -367,26 +367,24 @@ void __init kasan_early_init(void) * +-----------------+ +- shadow end ---+ */ /* populate kasan shadow (for identity mapping and zero page mapping) */ - kasan_early_vmemmap_populate(__sha(0), __sha(memsize), POPULATE_MAP); + kasan_early_pgtable_populate(__sha(0), __sha(memsize), POPULATE_MAP); if (IS_ENABLED(CONFIG_MODULES)) untracked_mem_end = kasan_vmax - MODULES_LEN; if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { untracked_mem_end = kasan_vmax - vmalloc_size - MODULES_LEN; /* shallowly populate kasan shadow for vmalloc and modules */ - kasan_early_vmemmap_populate(__sha(untracked_mem_end), - __sha(kasan_vmax), POPULATE_SHALLOW); + kasan_early_pgtable_populate(__sha(untracked_mem_end), __sha(kasan_vmax), + POPULATE_SHALLOW); } /* populate kasan shadow for untracked memory */ - kasan_early_vmemmap_populate(__sha(max_physmem_end), - __sha(untracked_mem_end), + kasan_early_pgtable_populate(__sha(max_physmem_end), __sha(untracked_mem_end), POPULATE_ZERO_SHADOW); - kasan_early_vmemmap_populate(__sha(kasan_vmax), - __sha(vmax_unlimited), + kasan_early_pgtable_populate(__sha(kasan_vmax), __sha(vmax_unlimited), POPULATE_ZERO_SHADOW); /* memory allocated for identity mapping structs will be freed later */ pgalloc_freeable = pgalloc_pos; /* populate identity mapping */ - kasan_early_vmemmap_populate(0, memsize, POPULATE_ONE2ONE); + kasan_early_pgtable_populate(0, memsize, POPULATE_ONE2ONE); kasan_set_pgd(early_pg_dir, _ASCE_TYPE_REGION2); kasan_enable_dat(); /* enable kasan */ -- cgit v1.2.3-70-g09d2 From 54b52981bb39ec3f33dc3677583b2ea30772b292 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 5 Oct 2020 10:28:48 +0200 Subject: s390/kasan: remove obvious parameter with the only possible value Kasan early code is only working on init_mm, remove unneeded pgd parameter from kasan_copy_shadow and rename it to kasan_copy_shadow_mapping. Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/include/asm/kasan.h | 4 ++-- arch/s390/mm/init.c | 2 +- arch/s390/mm/kasan_init.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h index 528dfb9be77a..0313f055f8c9 100644 --- a/arch/s390/include/asm/kasan.h +++ b/arch/s390/include/asm/kasan.h @@ -12,12 +12,12 @@ #define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) extern void kasan_early_init(void); -extern void kasan_copy_shadow(pgd_t *dst); +extern void kasan_copy_shadow_mapping(void); extern void kasan_free_early_identity(void); extern unsigned long kasan_vmax; #else static inline void kasan_early_init(void) { } -static inline void kasan_copy_shadow(pgd_t *dst) { } +static inline void kasan_copy_shadow_mapping(void) { } static inline void kasan_free_early_identity(void) { } #endif diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 77767850d0d0..69e6e2a5072e 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -105,7 +105,7 @@ void __init paging_init(void) S390_lowcore.user_asce = S390_lowcore.kernel_asce; crst_table_init((unsigned long *) init_mm.pgd, pgd_type); vmem_map_init(); - kasan_copy_shadow(init_mm.pgd); + kasan_copy_shadow_mapping(); /* enable virtual mapping in kernel mode */ __ctl_load(S390_lowcore.kernel_asce, 1, 1); diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index a43381f030d4..a9f561491110 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -393,7 +393,7 @@ void __init kasan_early_init(void) sclp_early_printk("KernelAddressSanitizer initialized\n"); } -void __init kasan_copy_shadow(pgd_t *pg_dir) +void __init kasan_copy_shadow_mapping(void) { /* * At this point we are still running on early pages setup early_pg_dir, @@ -407,7 +407,7 @@ void __init kasan_copy_shadow(pgd_t *pg_dir) p4d_t *p4_dir_dst; pg_dir_src = pgd_offset_raw(early_pg_dir, KASAN_SHADOW_START); - pg_dir_dst = pgd_offset_raw(pg_dir, KASAN_SHADOW_START); + pg_dir_dst = pgd_offset_raw(init_mm.pgd, KASAN_SHADOW_START); p4_dir_src = p4d_offset(pg_dir_src, KASAN_SHADOW_START); p4_dir_dst = p4d_offset(pg_dir_dst, KASAN_SHADOW_START); memcpy(p4_dir_dst, p4_dir_src, -- cgit v1.2.3-70-g09d2 From e385b550faf356db47234083c53e9841e8ec05c5 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 5 Oct 2020 09:07:43 +0200 Subject: s390/kasan: make kasan header self-contained It is relying on _REGION1_SHIFT / _REGION2_SHIFT values which come from asm/pgtable.h, so include it. Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/include/asm/kasan.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h index 0313f055f8c9..eea62586e719 100644 --- a/arch/s390/include/asm/kasan.h +++ b/arch/s390/include/asm/kasan.h @@ -2,6 +2,8 @@ #ifndef __ASM_KASAN_H #define __ASM_KASAN_H +#include + #ifdef CONFIG_KASAN #define KASAN_SHADOW_SCALE_SHIFT 3 -- cgit v1.2.3-70-g09d2 From 0c4ec024a481774df98910c79a4b3a105d1bb996 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 5 Oct 2020 13:53:36 +0200 Subject: s390/kasan: move memory needs estimation into a function Also correct rounding downs in estimation calculations. Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/boot/kaslr.c | 30 ++++++++---------------------- arch/s390/include/asm/kasan.h | 26 ++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 22 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index d844a5ef9089..987a9eaf228a 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "compressed/decompressor.h" #include "boot.h" @@ -179,34 +180,19 @@ unsigned long get_random_base(unsigned long safe_addr) if (memory_end_set) memory_limit = min(memory_limit, memory_end); + /* + * Avoid putting kernel in the end of physical memory + * which kasan will use for shadow memory and early pgtable + * mapping allocations. + */ + memory_limit -= kasan_estimate_memory_needs(memory_limit); + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE) { if (safe_addr < INITRD_START + INITRD_SIZE) safe_addr = INITRD_START + INITRD_SIZE; } safe_addr = ALIGN(safe_addr, THREAD_SIZE); - if ((IS_ENABLED(CONFIG_KASAN))) { - /* - * Estimate kasan memory requirements, which it will reserve - * at the very end of available physical memory. To estimate - * that, we take into account that kasan would require - * 1/8 of available physical memory (for shadow memory) + - * creating page tables for the whole memory + shadow memory - * region (1 + 1/8). To keep page tables estimates simple take - * the double of combined ptes size. - */ - memory_limit = get_mem_detect_end(); - if (memory_end_set && memory_limit > memory_end) - memory_limit = memory_end; - - /* for shadow memory */ - kasan_needs = memory_limit / 8; - /* for paging structures */ - kasan_needs += (memory_limit + kasan_needs) / PAGE_SIZE / - _PAGE_ENTRIES * _PAGE_TABLE_SIZE * 2; - memory_limit -= kasan_needs; - } - kernel_size = vmlinux.image_size + vmlinux.bss_size; if (safe_addr + kernel_size > memory_limit) return 0; diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h index eea62586e719..76f351bd6645 100644 --- a/arch/s390/include/asm/kasan.h +++ b/arch/s390/include/asm/kasan.h @@ -17,10 +17,36 @@ extern void kasan_early_init(void); extern void kasan_copy_shadow_mapping(void); extern void kasan_free_early_identity(void); extern unsigned long kasan_vmax; + +/* + * Estimate kasan memory requirements, which it will reserve + * at the very end of available physical memory. To estimate + * that, we take into account that kasan would require + * 1/8 of available physical memory (for shadow memory) + + * creating page tables for the whole memory + shadow memory + * region (1 + 1/8). To keep page tables estimates simple take + * the double of combined ptes size. + * + * physmem parameter has to be already adjusted if not entire physical memory + * would be used (e.g. due to effect of "mem=" option). + */ +static inline unsigned long kasan_estimate_memory_needs(unsigned long physmem) +{ + unsigned long kasan_needs; + unsigned long pages; + /* for shadow memory */ + kasan_needs = round_up(physmem / 8, PAGE_SIZE); + /* for paging structures */ + pages = DIV_ROUND_UP(physmem + kasan_needs, PAGE_SIZE); + kasan_needs += DIV_ROUND_UP(pages, _PAGE_ENTRIES) * _PAGE_TABLE_SIZE * 2; + + return kasan_needs; +} #else static inline void kasan_early_init(void) { } static inline void kasan_copy_shadow_mapping(void) { } static inline void kasan_free_early_identity(void) { } +static inline unsigned long kasan_estimate_memory_needs(unsigned long physmem) { return 0; } #endif #endif -- cgit v1.2.3-70-g09d2 From d7e7fbba67a32a32c3c7fe1ee6fccef93a0a8cc5 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 9 Oct 2020 17:14:02 +0200 Subject: s390/early: rewrite program parameter setup in C And move it earlier in the decompressor. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/boot/startup.c | 10 ++++++++++ arch/s390/kernel/head64.S | 7 +------ 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 95d13a252ed9..eeda91fcf7cc 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -58,6 +59,14 @@ void error(char *x) disabled_wait(); } +static void setup_lpp(void) +{ + S390_lowcore.current_pid = 0; + S390_lowcore.lpp = LPP_MAGIC; + if (test_facility(40)) + lpp(&S390_lowcore.lpp); +} + #ifdef CONFIG_KERNEL_UNCOMPRESSED unsigned long mem_safe_offset(void) { @@ -147,6 +156,7 @@ void startup_kernel(void) unsigned long safe_addr; void *img; + setup_lpp(); store_ipl_parmblock(); safe_addr = mem_safe_offset(); safe_addr = read_ipl_report(safe_addr); diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 8b88dbbda7df..0c253886da78 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -18,12 +18,7 @@ __HEAD ENTRY(startup_continue) - tm __LC_STFLE_FAC_LIST+5,0x80 # LPP available ? - jz 0f - xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid - mvi __LC_LPP,0x80 # and set LPP_MAGIC - .insn s,0xb2800000,__LC_LPP # load program parameter -0: larl %r1,tod_clock_base + larl %r1,tod_clock_base mvc 0(16,%r1),__LC_BOOT_CLOCK larl %r13,.LPG1 # get base # -- cgit v1.2.3-70-g09d2 From a67a88b0b8de16b4cd6ad50bfe0e03605904dc75 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 21 Oct 2020 10:56:47 +0200 Subject: s390/pci: remove races against pte updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Way back it was a reasonable assumptions that iomem mappings never change the pfn range they point at. But this has changed: - gpu drivers dynamically manage their memory nowadays, invalidating ptes with unmap_mapping_range when buffers get moved - contiguous dma allocations have moved from dedicated carvetouts to cma regions. This means if we miss the unmap the pfn might contain pagecache or anon memory (well anything allocated with GFP_MOVEABLE) - even /dev/mem now invalidates mappings when the kernel requests that iomem region when CONFIG_IO_STRICT_DEVMEM is set, see commit 3234ac664a87 ("/dev/mem: Revoke mappings when a driver claims the region") Accessing pfns obtained from ptes without holding all the locks is therefore no longer a good idea. Fix this. Since zpci_memcpy_from|toio seems to not do anything nefarious with locks we just need to open code get_pfn and follow_pfn and make sure we drop the locks only after we're done. The write function also needs the copy_from_user move, since we can't take userspace faults while holding the mmap sem. Reviewed-by: Gerald Schaefer Signed-off-by: Daniel Vetter Cc: Jason Gunthorpe Cc: Dan Williams Cc: Kees Cook Cc: Andrew Morton Cc: John Hubbard Cc: Jérôme Glisse Cc: Jan Kara Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-samsung-soc@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: Gerald Schaefer Cc: linux-s390@vger.kernel.org Cc: Niklas Schnelle Signed-off-by: Daniel Vetter Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/pci/pci_mmio.c | 98 ++++++++++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 41 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 401cf670a243..1a6adbc68ee8 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -119,33 +119,15 @@ static inline int __memcpy_toio_inuser(void __iomem *dst, return rc; } -static long get_pfn(unsigned long user_addr, unsigned long access, - unsigned long *pfn) -{ - struct vm_area_struct *vma; - long ret; - - mmap_read_lock(current->mm); - ret = -EINVAL; - vma = find_vma(current->mm, user_addr); - if (!vma) - goto out; - ret = -EACCES; - if (!(vma->vm_flags & access)) - goto out; - ret = follow_pfn(vma, user_addr, pfn); -out: - mmap_read_unlock(current->mm); - return ret; -} - SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, const void __user *, user_buffer, size_t, length) { u8 local_buf[64]; void __iomem *io_addr; void *buf; - unsigned long pfn; + struct vm_area_struct *vma; + pte_t *ptep; + spinlock_t *ptl; long ret; if (!zpci_is_enabled()) @@ -158,7 +140,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, * We only support write access to MIO capable devices if we are on * a MIO enabled system. Otherwise we would have to check for every * address if it is a special ZPCI_ADDR and would have to do - * a get_pfn() which we don't need for MIO capable devices. Currently + * a pfn lookup which we don't need for MIO capable devices. Currently * ISM devices are the only devices without MIO support and there is no * known need for accessing these from userspace. */ @@ -176,21 +158,37 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, } else buf = local_buf; - ret = get_pfn(mmio_addr, VM_WRITE, &pfn); + ret = -EFAULT; + if (copy_from_user(buf, user_buffer, length)) + goto out_free; + + mmap_read_lock(current->mm); + ret = -EINVAL; + vma = find_vma(current->mm, mmio_addr); + if (!vma) + goto out_unlock_mmap; + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) + goto out_unlock_mmap; + ret = -EACCES; + if (!(vma->vm_flags & VM_WRITE)) + goto out_unlock_mmap; + + ret = follow_pte_pmd(vma->vm_mm, mmio_addr, NULL, &ptep, NULL, &ptl); if (ret) - goto out; - io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | + goto out_unlock_mmap; + + io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); - ret = -EFAULT; if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) - goto out; - - if (copy_from_user(buf, user_buffer, length)) - goto out; + goto out_unlock_pt; ret = zpci_memcpy_toio(io_addr, buf, length); -out: +out_unlock_pt: + pte_unmap_unlock(ptep, ptl); +out_unlock_mmap: + mmap_read_unlock(current->mm); +out_free: if (buf != local_buf) kfree(buf); return ret; @@ -274,7 +272,9 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, u8 local_buf[64]; void __iomem *io_addr; void *buf; - unsigned long pfn; + struct vm_area_struct *vma; + pte_t *ptep; + spinlock_t *ptl; long ret; if (!zpci_is_enabled()) @@ -287,7 +287,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, * We only support read access to MIO capable devices if we are on * a MIO enabled system. Otherwise we would have to check for every * address if it is a special ZPCI_ADDR and would have to do - * a get_pfn() which we don't need for MIO capable devices. Currently + * a pfn lookup which we don't need for MIO capable devices. Currently * ISM devices are the only devices without MIO support and there is no * known need for accessing these from userspace. */ @@ -306,22 +306,38 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, buf = local_buf; } - ret = get_pfn(mmio_addr, VM_READ, &pfn); + mmap_read_lock(current->mm); + ret = -EINVAL; + vma = find_vma(current->mm, mmio_addr); + if (!vma) + goto out_unlock_mmap; + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) + goto out_unlock_mmap; + ret = -EACCES; + if (!(vma->vm_flags & VM_WRITE)) + goto out_unlock_mmap; + + ret = follow_pte_pmd(vma->vm_mm, mmio_addr, NULL, &ptep, NULL, &ptl); if (ret) - goto out; - io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); + goto out_unlock_mmap; + + io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) | + (mmio_addr & ~PAGE_MASK)); if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) { ret = -EFAULT; - goto out; + goto out_unlock_pt; } ret = zpci_memcpy_fromio(buf, io_addr, length); - if (ret) - goto out; - if (copy_to_user(user_buffer, buf, length)) + +out_unlock_pt: + pte_unmap_unlock(ptep, ptl); +out_unlock_mmap: + mmap_read_unlock(current->mm); + + if (!ret && copy_to_user(user_buffer, buf, length)) ret = -EFAULT; -out: if (buf != local_buf) kfree(buf); return ret; -- cgit v1.2.3-70-g09d2 From ab177c5d00cda2655fd814356ea034aaf179cf05 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 10 Nov 2020 14:51:32 +0100 Subject: s390/mm: remove unused clear_user_asce() Signed-off-by: Heiko Carstens --- arch/s390/include/asm/mmu_context.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index c9f3d8a52756..cec19ae164eb 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -78,13 +78,6 @@ static inline void set_user_asce(struct mm_struct *mm) clear_cpu_flag(CIF_ASCE_PRIMARY); } -static inline void clear_user_asce(void) -{ - S390_lowcore.user_asce = S390_lowcore.kernel_asce; - __ctl_load(S390_lowcore.kernel_asce, 1, 1); - set_cpu_flag(CIF_ASCE_PRIMARY); -} - mm_segment_t enable_sacf_uaccess(void); void disable_sacf_uaccess(mm_segment_t old_fs); -- cgit v1.2.3-70-g09d2 From da78693e6e496ccd5cb6b0e9025007803e8f93c2 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 26 Oct 2020 10:01:24 +0100 Subject: s390/pci: inform when missing required facilities when we're missing the necessary machine facilities zPCI can not function. Until now it would silently fail to be initialized, add an informational print. Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/pci/pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 570016ae8bcd..41df8fcfddde 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -851,8 +851,10 @@ static int __init pci_base_init(void) if (!s390_pci_probe) return 0; - if (!test_facility(69) || !test_facility(71)) + if (!test_facility(69) || !test_facility(71)) { + pr_info("PCI is not supported because CPU facilities 69 or 71 are not available\n"); return 0; + } if (test_facility(153) && !s390_pci_no_mio) { static_branch_enable(&have_mio); -- cgit v1.2.3-70-g09d2 From b971cbd03ee0a24f7af47b681e8f911794c69780 Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Wed, 11 Nov 2020 01:03:02 -0600 Subject: s390/sclp: provide extended sccb support As the number of cpus increases, the sccb response can exceed 4k for read cpu and read scp info sclp commands. Hence, all cpu info entries cant be embedded within a sccb response Solution: To overcome this limitation, extended sccb facility is provided by sclp. 1. Check if the extended sccb facility is installed. 2. If extended sccb is installed, perform the read scp and read cpu command considering a max sccb length of three page size. This max length is based on factors like max cpus, sccb header. 3. If extended sccb is not installed, perform the read scp and read cpu sclp command considering a max sccb length of one page size. Signed-off-by: Sumanth Korikkar Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/include/asm/sclp.h | 7 ++++++- arch/s390/include/asm/setup.h | 2 -- drivers/s390/char/sclp.h | 8 ++++++-- drivers/s390/char/sclp_cmd.c | 10 +++++++--- drivers/s390/char/sclp_early.c | 3 ++- drivers/s390/char/sclp_early_core.c | 6 ++++-- 6 files changed, 25 insertions(+), 11 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index a7bdd128d85b..5763769a39b6 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -12,7 +12,12 @@ #include #define SCLP_CHP_INFO_MASK_SIZE 32 -#define SCLP_MAX_CORES 256 +#define EARLY_SCCB_SIZE PAGE_SIZE +#define SCLP_MAX_CORES 512 +/* 144 + 16 * SCLP_MAX_CORES + 2 * (SCLP_MAX_CORES - 1) */ +#define EXT_SCCB_READ_SCP (3 * PAGE_SIZE) +/* 24 + 16 * SCLP_MAX_CORES */ +#define EXT_SCCB_READ_CPU (3 * PAGE_SIZE) struct sclp_chp_info { u8 recognized[SCLP_CHP_INFO_MASK_SIZE]; diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index bdb242a1544e..1be6a064f317 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -16,8 +16,6 @@ #define EARLY_SCCB_OFFSET 0x11000 #define HEAD_END 0x12000 -#define EARLY_SCCB_SIZE PAGE_SIZE - /* * Machine features detected in early.c */ diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index 448ed25bf6b8..6de919944a39 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -156,7 +156,11 @@ struct read_cpu_info_sccb { u16 offset_configured; u16 nr_standby; u16 offset_standby; - u8 reserved[4096 - 16]; + /* + * Without ext sccb, struct size is PAGE_SIZE. + * With ext sccb, struct size is EXT_SCCB_READ_CPU. + */ + u8 reserved[]; } __attribute__((packed, aligned(PAGE_SIZE))); struct read_info_sccb { @@ -199,7 +203,7 @@ struct read_info_sccb { u8 byte_134; /* 134 */ u8 cpudirq; /* 135 */ u16 cbl; /* 136-137 */ - u8 _pad_138[4096 - 138]; /* 138-4095 */ + u8 _pad_138[EXT_SCCB_READ_SCP - 138]; } __packed __aligned(PAGE_SIZE); struct read_storage_sccb { diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index f6e97f0830f6..7ebe89890a9b 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "sclp.h" @@ -87,14 +88,17 @@ out: int _sclp_get_core_info(struct sclp_core_info *info) { int rc; + int length = test_facility(140) ? EXT_SCCB_READ_CPU : PAGE_SIZE; struct read_cpu_info_sccb *sccb; if (!SCLP_HAS_CPU_INFO) return -EOPNOTSUPP; - sccb = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + + sccb = (void *)__get_free_pages(GFP_KERNEL | GFP_DMA | __GFP_ZERO, get_order(length)); if (!sccb) return -ENOMEM; - sccb->header.length = sizeof(*sccb); + sccb->header.length = length; + sccb->header.control_mask[2] = 0x80; rc = sclp_sync_request_timeout(SCLP_CMDW_READ_CPU_INFO, sccb, SCLP_QUEUE_INTERVAL); if (rc) @@ -107,7 +111,7 @@ int _sclp_get_core_info(struct sclp_core_info *info) } sclp_fill_core_info(info, sccb); out: - free_page((unsigned long) sccb); + free_pages((unsigned long) sccb, get_order(length)); return rc; } diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index cc78b538acbf..2f3515fa242a 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "sclp_sdias.h" #include "sclp.h" @@ -114,7 +115,7 @@ void __init sclp_early_get_ipl_info(struct sclp_ipl_info *info) int __init sclp_early_get_core_info(struct sclp_core_info *info) { struct read_cpu_info_sccb *sccb; - int length = PAGE_SIZE; + int length = test_facility(140) ? EXT_SCCB_READ_CPU : PAGE_SIZE; int rc = 0; if (!SCLP_HAS_CPU_INFO) diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c index d4fb61c10d7c..ec9f8ad5341c 100644 --- a/drivers/s390/char/sclp_early_core.c +++ b/drivers/s390/char/sclp_early_core.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "sclp.h" #include "sclp_rw.h" @@ -237,13 +238,14 @@ void sclp_early_printk(const char *str) int __init sclp_early_read_info(void) { int i; + int length = test_facility(140) ? EXT_SCCB_READ_SCP : PAGE_SIZE; struct read_info_sccb *sccb = &sclp_info_sccb; sclp_cmdw_t commands[] = {SCLP_CMDW_READ_SCP_INFO_FORCED, SCLP_CMDW_READ_SCP_INFO}; for (i = 0; i < ARRAY_SIZE(commands); i++) { - memset(sccb, 0, sizeof(*sccb)); - sccb->header.length = sizeof(*sccb); + memset(sccb, 0, length); + sccb->header.length = length; sccb->header.function_code = 0x80; sccb->header.control_mask[2] = 0x80; if (sclp_early_cmd(commands[i], sccb)) -- cgit v1.2.3-70-g09d2 From 0cd9b7230cc57b0f9cfd13ef5c3830c7db1a68d4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 11 Nov 2020 18:46:26 +0100 Subject: s390: add separate program check exit path System call and program check handler both use the system call exit path when returning to previous context. However the program check handler jumps right to the end of the system call exit path if the previous context is kernel context. This lead to the quite odd double disabling of interrupts in the system call exit path introduced with commit ce9dfafe29be ("s390: fix system call exit path"). To avoid that have a separate program check handler exit path if the previous context is kernel context. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.S | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 5346545b9860..0a7811d993a7 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -430,8 +430,6 @@ ENTRY(system_call) TSTMSK __LC_CPU_FLAGS,(_CIF_WORK-_CIF_FPU) jnz .Lsysc_work BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP -.Lsysc_restore: - DISABLE_INTS TSTMSK __LC_CPU_FLAGS, _CIF_FPU jz .Lsysc_skip_fpu brasl %r14,load_fpu_regs @@ -709,10 +707,20 @@ ENTRY(pgm_check_handler) .Lpgm_return: LOCKDEP_SYS_EXIT tm __PT_PSW+1(%r11),0x01 # returning to user ? - jno .Lsysc_restore + jno .Lpgm_restore TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL jo .Lsysc_do_syscall j .Lsysc_tif +.Lpgm_restore: + DISABLE_INTS + TSTMSK __LC_CPU_FLAGS, _CIF_FPU + jz .Lpgm_skip_fpu + brasl %r14,load_fpu_regs +.Lpgm_skip_fpu: + mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + lmg %r0,%r15,__PT_R0(%r11) + b __LC_RETURN_LPSWE # # PER event in supervisor state, must be kprobes -- cgit v1.2.3-70-g09d2 From 5ec11d0966406e4857a642539c5781fe72cc6e22 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 16 Nov 2020 11:18:00 +0100 Subject: s390/cio: fix kernel-doc markups in cio driver. Fix typo in the kernel-doc markups 1. ccw driver -> ccw_driver 2. ccw_device_id_is_equal() -> ccw_dev_id_is_equal Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Cornelia Huck [vneethv@linux.ibm.com: slight modification in the changelog] Reviewed-by: Vineeth Vijayan Signed-off-by: Heiko Carstens --- arch/s390/include/asm/ccwdev.h | 2 +- arch/s390/include/asm/cio.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index c0be5fe1ddba..069709b8e9e7 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -115,7 +115,7 @@ enum uc_todo { }; /** - * struct ccw driver - device driver for channel attached devices + * struct ccw_driver - device driver for channel attached devices * @ids: ids supported by this driver * @probe: function called on probe * @remove: function called on remove diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 5c58756d6476..23dceb8d0453 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -329,7 +329,7 @@ struct ccw_dev_id { }; /** - * ccw_device_id_is_equal() - compare two ccw_dev_ids + * ccw_dev_id_is_equal() - compare two ccw_dev_ids * @dev_id1: a ccw_dev_id * @dev_id2: another ccw_dev_id * Returns: -- cgit v1.2.3-70-g09d2 From 1e632eaa0f4b7f65a81301205ca122024991e1d3 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 5 Nov 2020 09:34:34 +0200 Subject: s390/prng: let misc_register() add the prng sysfs attributes Instead of creating the sysfs attributes for the prng devices by hand, describe them in .groups and let the misdevice core handle it. This also ensures that the attributes are available when the KOBJ_ADD event is raised. Signed-off-by: Julian Wiedmann Reviewed-by: Harald Freudenberger Signed-off-by: Heiko Carstens --- arch/s390/crypto/prng.c | 53 +++++++++++++++---------------------------------- 1 file changed, 16 insertions(+), 37 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 5057773f82e9..b2f219ec379c 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -674,20 +674,6 @@ static const struct file_operations prng_tdes_fops = { .llseek = noop_llseek, }; -static struct miscdevice prng_sha512_dev = { - .name = "prandom", - .minor = MISC_DYNAMIC_MINOR, - .mode = 0644, - .fops = &prng_sha512_fops, -}; -static struct miscdevice prng_tdes_dev = { - .name = "prandom", - .minor = MISC_DYNAMIC_MINOR, - .mode = 0644, - .fops = &prng_tdes_fops, -}; - - /* chunksize attribute (ro) */ static ssize_t prng_chunksize_show(struct device *dev, struct device_attribute *attr, @@ -801,18 +787,30 @@ static struct attribute *prng_sha512_dev_attrs[] = { &dev_attr_strength.attr, NULL }; +ATTRIBUTE_GROUPS(prng_sha512_dev); + static struct attribute *prng_tdes_dev_attrs[] = { &dev_attr_chunksize.attr, &dev_attr_byte_counter.attr, &dev_attr_mode.attr, NULL }; +ATTRIBUTE_GROUPS(prng_tdes_dev); -static struct attribute_group prng_sha512_dev_attr_group = { - .attrs = prng_sha512_dev_attrs +static struct miscdevice prng_sha512_dev = { + .name = "prandom", + .minor = MISC_DYNAMIC_MINOR, + .mode = 0644, + .fops = &prng_sha512_fops, + .groups = prng_sha512_dev_groups, }; -static struct attribute_group prng_tdes_dev_attr_group = { - .attrs = prng_tdes_dev_attrs + +static struct miscdevice prng_tdes_dev = { + .name = "prandom", + .minor = MISC_DYNAMIC_MINOR, + .mode = 0644, + .fops = &prng_tdes_fops, + .groups = prng_tdes_dev_groups, }; @@ -867,13 +865,6 @@ static int __init prng_init(void) prng_sha512_deinstantiate(); goto out; } - ret = sysfs_create_group(&prng_sha512_dev.this_device->kobj, - &prng_sha512_dev_attr_group); - if (ret) { - misc_deregister(&prng_sha512_dev); - prng_sha512_deinstantiate(); - goto out; - } } else { @@ -898,14 +889,6 @@ static int __init prng_init(void) prng_tdes_deinstantiate(); goto out; } - ret = sysfs_create_group(&prng_tdes_dev.this_device->kobj, - &prng_tdes_dev_attr_group); - if (ret) { - misc_deregister(&prng_tdes_dev); - prng_tdes_deinstantiate(); - goto out; - } - } out: @@ -916,13 +899,9 @@ out: static void __exit prng_exit(void) { if (prng_mode == PRNG_MODE_SHA512) { - sysfs_remove_group(&prng_sha512_dev.this_device->kobj, - &prng_sha512_dev_attr_group); misc_deregister(&prng_sha512_dev); prng_sha512_deinstantiate(); } else { - sysfs_remove_group(&prng_tdes_dev.this_device->kobj, - &prng_tdes_dev_attr_group); misc_deregister(&prng_tdes_dev); prng_tdes_deinstantiate(); } -- cgit v1.2.3-70-g09d2 From 73045a08cf5549cc7dee14463431fbeb2134dd67 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 19 Oct 2020 11:01:33 +0200 Subject: s390: unify identity mapping limits handling Currently we have to consider too many different values which in the end only affect identity mapping size. These are: 1. max_physmem_end - end of physical memory online or standby. Always <= end of the last online memory block (get_mem_detect_end()). 2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the kernel is able to support. 3. "mem=" kernel command line option which limits physical memory usage. 4. OLDMEM_BASE which is a kdump memory limit when the kernel is executed as crash kernel. 5. "hsa" size which is a memory limit when the kernel is executed during zfcp/nvme dump. Through out kernel startup and run we juggle all those values at once but that does not bring any amusement, only confusion and complexity. Unify all those values to a single one we should really care, that is our identity mapping size. Signed-off-by: Vasily Gorbik Reviewed-by: Alexander Gordeev Acked-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/boot/boot.h | 11 ++++++---- arch/s390/boot/ipl_parm.c | 44 ++++++++++++-------------------------- arch/s390/boot/kaslr.c | 3 +-- arch/s390/boot/mem_detect.c | 13 ++++++------ arch/s390/boot/startup.c | 48 ++++++++++++++++++++++++++++++++++++++---- arch/s390/include/asm/setup.h | 4 +--- arch/s390/kernel/setup.c | 37 +++++++++++++++----------------- arch/s390/mm/dump_pagetables.c | 2 +- arch/s390/mm/kasan_init.c | 21 ++++++++++++------ drivers/s390/char/sclp_cmd.c | 6 +++--- 10 files changed, 109 insertions(+), 80 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 4d4536299789..d5cf2e8c5eb4 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -2,20 +2,23 @@ #ifndef BOOT_BOOT_H #define BOOT_BOOT_H +#include + void startup_kernel(void); -void detect_memory(void); +unsigned long detect_memory(void); +bool is_ipl_block_dump(void); void store_ipl_parmblock(void); void setup_boot_command_line(void); void parse_boot_command_line(void); -void setup_memory_end(void); void verify_facilities(void); void print_missing_facilities(void); void print_pgm_check_info(void); unsigned long get_random_base(unsigned long safe_addr); -extern int kaslr_enabled; -extern int vmalloc_size_set; extern const char kernel_version[]; +extern unsigned long memory_limit; +extern int vmalloc_size_set; +extern int kaslr_enabled; unsigned long read_ipl_report(unsigned long safe_offset); diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 33f43a7d03f3..d372a45fe10e 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -17,10 +17,9 @@ int __bootdata_preserved(ipl_block_valid); unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL; unsigned long __bootdata(vmalloc_size) = VMALLOC_DEFAULT_SIZE; -unsigned long __bootdata(memory_end); -int __bootdata(memory_end_set); int __bootdata(noexec_disabled); +unsigned long memory_limit; int vmalloc_size_set; int kaslr_enabled; @@ -58,6 +57,17 @@ void store_ipl_parmblock(void) ipl_block_valid = 1; } +bool is_ipl_block_dump(void) +{ + if (ipl_block.pb0_hdr.pbt == IPL_PBT_FCP && + ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) + return true; + if (ipl_block.pb0_hdr.pbt == IPL_PBT_NVME && + ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP) + return true; + return false; +} + static size_t scpdata_length(const u8 *buf, size_t count) { while (count) { @@ -238,10 +248,8 @@ void parse_boot_command_line(void) while (*args) { args = next_arg(args, ¶m, &val); - if (!strcmp(param, "mem") && val) { - memory_end = round_down(memparse(val, NULL), PAGE_SIZE); - memory_end_set = 1; - } + if (!strcmp(param, "mem") && val) + memory_limit = round_down(memparse(val, NULL), PAGE_SIZE); if (!strcmp(param, "vmalloc") && val) { vmalloc_size = round_up(memparse(val, NULL), PAGE_SIZE); @@ -282,27 +290,3 @@ void parse_boot_command_line(void) #endif } } - -static inline bool is_ipl_block_dump(void) -{ - if (ipl_block.pb0_hdr.pbt == IPL_PBT_FCP && - ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) - return true; - if (ipl_block.pb0_hdr.pbt == IPL_PBT_NVME && - ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP) - return true; - return false; -} - -void setup_memory_end(void) -{ -#ifdef CONFIG_CRASH_DUMP - if (OLDMEM_BASE) { - kaslr_enabled = 0; - } else if (ipl_block_valid && is_ipl_block_dump()) { - kaslr_enabled = 0; - if (!sclp_early_get_hsa_size(&memory_end) && memory_end) - memory_end_set = 1; - } -#endif -} diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index 987a9eaf228a..0dd48fbdbaa4 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -177,8 +177,7 @@ unsigned long get_random_base(unsigned long safe_addr) unsigned long kasan_needs; int i; - if (memory_end_set) - memory_limit = min(memory_limit, memory_end); + memory_limit = min(memory_limit, ident_map_size); /* * Avoid putting kernel in the end of physical memory diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c index 62e7c13ce85c..40168e59abd3 100644 --- a/arch/s390/boot/mem_detect.c +++ b/arch/s390/boot/mem_detect.c @@ -8,7 +8,6 @@ #include "compressed/decompressor.h" #include "boot.h" -unsigned long __bootdata(max_physmem_end); struct mem_detect_info __bootdata(mem_detect); /* up to 256 storage elements, 1020 subincrements each */ @@ -149,27 +148,29 @@ static void search_mem_end(void) add_mem_detect_block(0, (offset + 1) << 20); } -void detect_memory(void) +unsigned long detect_memory(void) { + unsigned long max_physmem_end; + sclp_early_get_memsize(&max_physmem_end); if (!sclp_early_read_storage_info()) { mem_detect.info_source = MEM_DETECT_SCLP_STOR_INFO; - return; + return max_physmem_end; } if (!diag260()) { mem_detect.info_source = MEM_DETECT_DIAG260; - return; + return max_physmem_end; } if (max_physmem_end) { add_mem_detect_block(0, max_physmem_end); mem_detect.info_source = MEM_DETECT_SCLP_READ_INFO; - return; + return max_physmem_end; } search_mem_end(); mem_detect.info_source = MEM_DETECT_BIN_SEARCH; - max_physmem_end = get_mem_detect_end(); + return get_mem_detect_end(); } diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index eeda91fcf7cc..05f8eefa3dcf 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include #include @@ -14,6 +15,7 @@ extern char __boot_data_start[], __boot_data_end[]; extern char __boot_data_preserved_start[], __boot_data_preserved_end[]; unsigned long __bootdata_preserved(__kaslr_offset); +unsigned long __bootdata(ident_map_size); /* * Some code and data needs to stay below 2 GB, even when the kernel would be @@ -127,6 +129,46 @@ static void handle_relocs(unsigned long offset) } } +/* + * Merge information from several sources into a single ident_map_size value. + * "ident_map_size" represents the upper limit of physical memory we may ever + * reach. It might not be all online memory, but also include standby (offline) + * memory. "ident_map_size" could be lower then actual standby or even online + * memory present, due to limiting factors. We should never go above this limit. + * It is the size of our identity mapping. + * + * Consider the following factors: + * 1. max_physmem_end - end of physical memory online or standby. + * Always <= end of the last online memory block (get_mem_detect_end()). + * 2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the + * kernel is able to support. + * 3. "mem=" kernel command line option which limits physical memory usage. + * 4. OLDMEM_BASE which is a kdump memory limit when the kernel is executed as + * crash kernel. + * 5. "hsa" size which is a memory limit when the kernel is executed during + * zfcp/nvme dump. + */ +static void setup_ident_map_size(unsigned long max_physmem_end) +{ + unsigned long hsa_size; + + ident_map_size = max_physmem_end; + if (memory_limit) + ident_map_size = min(ident_map_size, memory_limit); + ident_map_size = min(ident_map_size, 1UL << MAX_PHYSMEM_BITS); + +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE) { + kaslr_enabled = 0; + ident_map_size = min(ident_map_size, OLDMEM_SIZE); + } else if (ipl_block_valid && is_ipl_block_dump()) { + kaslr_enabled = 0; + if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) + ident_map_size = min(ident_map_size, hsa_size); + } +#endif +} + /* * This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's. */ @@ -145,8 +187,7 @@ static void setup_vmalloc_size(void) if (vmalloc_size_set) return; - size = (memory_end ?: max_physmem_end) >> 3; - size = round_up(size, _SEGMENT_SIZE); + size = round_up(ident_map_size / 8, _SEGMENT_SIZE); vmalloc_size = max(size, vmalloc_size); } @@ -165,8 +206,7 @@ void startup_kernel(void) sclp_early_read_info(); setup_boot_command_line(); parse_boot_command_line(); - setup_memory_end(); - detect_memory(); + setup_ident_map_size(detect_memory()); setup_vmalloc_size(); random_lma = __kaslr_offset = 0; diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 1be6a064f317..3e388fa208d4 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -86,10 +86,8 @@ extern unsigned int zlib_dfltcc_support; #define ZLIB_DFLTCC_FULL_DEBUG 4 extern int noexec_disabled; -extern int memory_end_set; -extern unsigned long memory_end; +extern unsigned long ident_map_size; extern unsigned long vmalloc_size; -extern unsigned long max_physmem_end; /* The Write Back bit position in the physaddr is given by the SLPC PCI */ extern unsigned long mio_wb_bit_mask; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 02bccee5ee85..756936785598 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -94,10 +94,8 @@ char elf_platform[ELF_PLATFORM_SIZE]; unsigned long int_hwcap = 0; int __bootdata(noexec_disabled); -int __bootdata(memory_end_set); -unsigned long __bootdata(memory_end); +unsigned long __bootdata(ident_map_size); unsigned long __bootdata(vmalloc_size); -unsigned long __bootdata(max_physmem_end); struct mem_detect_info __bootdata(mem_detect); struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table); @@ -557,12 +555,12 @@ static void __init setup_resources(void) #endif } -static void __init setup_memory_end(void) +static void __init setup_ident_map_size(void) { unsigned long vmax, tmp; /* Choose kernel address space layout: 3 or 4 levels. */ - tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; + tmp = ident_map_size / PAGE_SIZE; tmp = tmp * (sizeof(struct page) + PAGE_SIZE); if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE) vmax = _REGION2_SIZE; /* 3-level kernel page table */ @@ -589,22 +587,22 @@ static void __init setup_memory_end(void) tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS); vmemmap = (struct page *) tmp; - /* Take care that memory_end is set and <= vmemmap */ - memory_end = min(memory_end ?: max_physmem_end, (unsigned long)vmemmap); + /* Take care that ident_map_size <= vmemmap */ + ident_map_size = min(ident_map_size, (unsigned long)vmemmap); #ifdef CONFIG_KASAN - memory_end = min(memory_end, KASAN_SHADOW_START); + ident_map_size = min(ident_map_size, KASAN_SHADOW_START); #endif - vmemmap_size = SECTION_ALIGN_UP(memory_end / PAGE_SIZE) * sizeof(struct page); + vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page); #ifdef CONFIG_KASAN /* move vmemmap above kasan shadow only if stands in a way */ if (KASAN_SHADOW_END > (unsigned long)vmemmap && (unsigned long)vmemmap + vmemmap_size > KASAN_SHADOW_START) vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END); #endif - max_pfn = max_low_pfn = PFN_DOWN(memory_end); - memblock_remove(memory_end, ULONG_MAX); + max_pfn = max_low_pfn = PFN_DOWN(ident_map_size); + memblock_remove(ident_map_size, ULONG_MAX); - pr_notice("The maximum memory size is %luMB\n", memory_end >> 20); + pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20); } #ifdef CONFIG_CRASH_DUMP @@ -634,12 +632,11 @@ static struct notifier_block kdump_mem_nb = { #endif /* - * Make sure that the area behind memory_end is protected + * Make sure that the area above identity mapping is protected */ -static void __init reserve_memory_end(void) +static void __init reserve_above_ident_map(void) { - if (memory_end_set) - memblock_reserve(memory_end, ULONG_MAX); + memblock_reserve(ident_map_size, ULONG_MAX); } /* @@ -676,7 +673,7 @@ static void __init reserve_crashkernel(void) phys_addr_t low, high; int rc; - rc = parse_crashkernel(boot_command_line, memory_end, &crash_size, + rc = parse_crashkernel(boot_command_line, ident_map_size, &crash_size, &crash_base); crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); @@ -1130,7 +1127,7 @@ void __init setup_arch(char **cmdline_p) setup_control_program_code(); /* Do some memory reservations *before* memory is added to memblock */ - reserve_memory_end(); + reserve_above_ident_map(); reserve_oldmem(); reserve_kernel(); reserve_initrd(); @@ -1145,9 +1142,9 @@ void __init setup_arch(char **cmdline_p) remove_oldmem(); setup_uv(); - setup_memory_end(); + setup_ident_map_size(); setup_memory(); - dma_contiguous_reserve(memory_end); + dma_contiguous_reserve(ident_map_size); vmcp_cma_reserve(); check_initrd(); diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 8f9ff7e7187d..e40a30647d99 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -255,7 +255,7 @@ static int pt_dump_init(void) */ max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2; max_addr = 1UL << (max_addr * 11 + 31); - address_markers[IDENTITY_AFTER_END_NR].start_address = memory_end; + address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size; address_markers[MODULES_NR].start_address = MODULES_VADDR; address_markers[MODULES_END_NR].start_address = MODULES_END; address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index a9f561491110..db4d303aaaa9 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -289,12 +289,19 @@ void __init kasan_early_init(void) memsize = get_mem_detect_end(); if (!memsize) kasan_early_panic("cannot detect physical memory size\n"); - /* respect mem= cmdline parameter */ - if (memory_end_set && memsize > memory_end) - memsize = memory_end; - if (IS_ENABLED(CONFIG_CRASH_DUMP) && OLDMEM_BASE) - memsize = min(memsize, OLDMEM_SIZE); - memsize = min(memsize, KASAN_SHADOW_START); + /* + * Kasan currently supports standby memory but only if it follows + * online memory (default allocation), i.e. no memory holes. + * - memsize represents end of online memory + * - ident_map_size represents online + standby and memory limits + * accounted. + * Kasan maps "memsize" right away. + * [0, memsize] - as identity mapping + * [__sha(0), __sha(memsize)] - shadow memory for identity mapping + * The rest [memsize, ident_map_size] if memsize < ident_map_size + * could be mapped/unmapped dynamically later during memory hotplug. + */ + memsize = min(memsize, ident_map_size); BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE)); BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE)); @@ -377,7 +384,7 @@ void __init kasan_early_init(void) POPULATE_SHALLOW); } /* populate kasan shadow for untracked memory */ - kasan_early_pgtable_populate(__sha(max_physmem_end), __sha(untracked_mem_end), + kasan_early_pgtable_populate(__sha(ident_map_size), __sha(untracked_mem_end), POPULATE_ZERO_SHADOW); kasan_early_pgtable_populate(__sha(kasan_vmax), __sha(vmax_unlimited), POPULATE_ZERO_SHADOW); diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 7ebe89890a9b..d41bc144c183 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -401,10 +401,10 @@ static void __init add_memory_merged(u16 rn) goto skip_add; if (start + size > VMEM_MAX_PHYS) size = VMEM_MAX_PHYS - start; - if (memory_end_set && (start >= memory_end)) + if (start >= ident_map_size) goto skip_add; - if (memory_end_set && (start + size > memory_end)) - size = memory_end - start; + if (start + size > ident_map_size) + size = ident_map_size - start; block_size = memory_block_size_bytes(); align_to_block_size(&start, &size, block_size); if (!size) -- cgit v1.2.3-70-g09d2 From c9343637d6b265127dfe37cd7e09eb6feac03032 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 12 Nov 2020 15:54:47 +0100 Subject: s390/ftrace: assume -mhotpatch or -mrecord-mcount always available Currently the kernel minimal compiler requirement is gcc 4.9 or clang 10.0.1. * gcc -mhotpatch option is supported since 4.8. * A combination of -pg -mrecord-mcount -mnop-mcount -mfentry flags is supported since gcc 9 and since clang 10. Drop support for old -pg function prologues. Which leaves binary compatible -mhotpatch / -mnop-mcount -mfentry prologues in a form: brcl 0,0 Which are also do not require initial nop optimization / conversion and presence of _mcount symbol. Signed-off-by: Vasily Gorbik Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/include/asm/ftrace.h | 31 +++++++-------------- arch/s390/kernel/ftrace.c | 63 ++++++++++-------------------------------- arch/s390/kernel/mcount.S | 8 ------ scripts/recordmcount.pl | 3 -- 4 files changed, 24 insertions(+), 81 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 68d362f8d6c1..695c61989f97 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -2,16 +2,9 @@ #ifndef _ASM_S390_FTRACE_H #define _ASM_S390_FTRACE_H +#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR #define ARCH_SUPPORTS_FTRACE_OPS 1 - -#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT) #define MCOUNT_INSN_SIZE 6 -#else -#define MCOUNT_INSN_SIZE 24 -#define MCOUNT_RETURN_FIXUP 18 -#endif - -#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR #ifndef __ASSEMBLY__ @@ -22,7 +15,6 @@ #define ftrace_return_address(n) __builtin_return_address(n) #endif -void _mcount(void); void ftrace_caller(void); extern char ftrace_graph_caller_end; @@ -30,12 +22,20 @@ extern unsigned long ftrace_plt; struct dyn_arch_ftrace { }; -#define MCOUNT_ADDR ((unsigned long)_mcount) +#define MCOUNT_ADDR 0 #define FTRACE_ADDR ((unsigned long)ftrace_caller) #define KPROBE_ON_FTRACE_NOP 0 #define KPROBE_ON_FTRACE_CALL 1 +struct module; +struct dyn_ftrace; +/* + * Either -mhotpatch or -mnop-mcount is used - no explicit init is required + */ +static inline int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) { return 0; } +#define ftrace_init_nop ftrace_init_nop + static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; @@ -49,28 +49,17 @@ struct ftrace_insn { static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn) { #ifdef CONFIG_FUNCTION_TRACER -#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT) /* brcl 0,0 */ insn->opc = 0xc004; insn->disp = 0; -#else - /* jg .+24 */ - insn->opc = 0xc0f4; - insn->disp = MCOUNT_INSN_SIZE / 2; -#endif #endif } static inline int is_ftrace_nop(struct ftrace_insn *insn) { #ifdef CONFIG_FUNCTION_TRACER -#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT) if (insn->disp == 0) return 1; -#else - if (insn->disp == MCOUNT_INSN_SIZE / 2) - return 1; -#endif #endif return 0; } diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index b388e87a08bf..ebc1284a618b 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -22,56 +22,26 @@ #include "entry.h" /* - * The mcount code looks like this: - * stg %r14,8(%r15) # offset 0 - * larl %r1,<&counter> # offset 6 - * brasl %r14,_mcount # offset 12 - * lg %r14,8(%r15) # offset 18 - * Total length is 24 bytes. Only the first instruction will be patched - * by ftrace_make_call / ftrace_make_nop. - * The enabled ftrace code block looks like this: + * To generate function prologue either gcc's hotpatch feature (since gcc 4.8) + * or a combination of -pg -mrecord-mcount -mnop-mcount -mfentry flags + * (since gcc 9 / clang 10) is used. + * In both cases the original and also the disabled function prologue contains + * only a single six byte instruction and looks like this: + * > brcl 0,0 # offset 0 + * To enable ftrace the code gets patched like above and afterwards looks + * like this: * > brasl %r0,ftrace_caller # offset 0 - * larl %r1,<&counter> # offset 6 - * brasl %r14,_mcount # offset 12 - * lg %r14,8(%r15) # offset 18 + * + * The instruction will be patched by ftrace_make_call / ftrace_make_nop. * The ftrace function gets called with a non-standard C function call ABI * where r0 contains the return address. It is also expected that the called * function only clobbers r0 and r1, but restores r2-r15. * For module code we can't directly jump to ftrace caller, but need a * trampoline (ftrace_plt), which clobbers also r1. - * The return point of the ftrace function has offset 24, so execution - * continues behind the mcount block. - * The disabled ftrace code block looks like this: - * > jg .+24 # offset 0 - * larl %r1,<&counter> # offset 6 - * brasl %r14,_mcount # offset 12 - * lg %r14,8(%r15) # offset 18 - * The jg instruction branches to offset 24 to skip as many instructions - * as possible. - * In case we use gcc's hotpatch feature the original and also the disabled - * function prologue contains only a single six byte instruction and looks - * like this: - * > brcl 0,0 # offset 0 - * To enable ftrace the code gets patched like above and afterwards looks - * like this: - * > brasl %r0,ftrace_caller # offset 0 */ unsigned long ftrace_plt; -static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn) -{ -#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT) - /* brcl 0,0 */ - insn->opc = 0xc004; - insn->disp = 0; -#else - /* stg r14,8(r15) */ - insn->opc = 0xe3e0; - insn->disp = 0xf0080024; -#endif -} - int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { @@ -85,15 +55,10 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; - if (addr == MCOUNT_ADDR) { - /* Initial code replacement */ - ftrace_generate_orig_insn(&orig); - ftrace_generate_nop_insn(&new); - } else { - /* Replace ftrace call with a nop. */ - ftrace_generate_call_insn(&orig, rec->ip); - ftrace_generate_nop_insn(&new); - } + /* Replace ftrace call with a nop. */ + ftrace_generate_call_insn(&orig, rec->ip); + ftrace_generate_nop_insn(&new); + /* Verify that the to be replaced code matches what we expect. */ if (memcmp(&orig, &old, sizeof(old))) return -EINVAL; diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 7458dcfd6464..faf64c2f90f5 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -33,11 +33,6 @@ ENDPROC(ftrace_stub) #define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD #endif -ENTRY(_mcount) - BR_EX %r14 -ENDPROC(_mcount) -EXPORT_SYMBOL(_mcount) - ENTRY(ftrace_caller) .globl ftrace_regs_caller .set ftrace_regs_caller,ftrace_caller @@ -46,9 +41,6 @@ ENTRY(ftrace_caller) ipm %r14 # don't put any instructions sllg %r14,%r14,16 # clobbering CC before this point lgr %r1,%r15 -#if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)) - aghi %r0,MCOUNT_RETURN_FIXUP -#endif # allocate stack frame for ftrace_caller to contain traced function aghi %r15,-TRACED_FUNC_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 3f77a5d695c1..56c801502b9a 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -254,9 +254,6 @@ if ($arch eq "x86_64") { if ($cc =~ /-DCC_USING_HOTPATCH/) { $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*brcl\\s*0,[0-9a-f]+ <([^\+]*)>\$"; $mcount_adjust = 0; - } else { - $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$"; - $mcount_adjust = -14; } $alignment = 8; $type = ".quad"; -- cgit v1.2.3-70-g09d2 From 9a78c70a1ba03cb4a8fb96964c6ee77236dd487b Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 10 Nov 2020 17:05:26 +0100 Subject: s390/decompressor: add decompressor_printk The decompressor does not have any special debug means. Running the kernel under qemu with gdb is helpful but tedious exercise if done repeatedly. It is also not applicable to debugging under LPAR and z/VM. One special thing which stands out is a working sclp_early_printk, which could be used once the kernel switches to 64-bit addressing mode. But sclp_early_printk does not provide any string formating capabilities. Formatting and printing string without printk-alike function is a not fun. The lack of printk-alike function means people would save up on testing and introduce more bugs. So, finally, provide decompressor_printk function, which fits on one screen and trades features for simplicity. It only supports "%s", "%x" and "%lx" specifiers and zero padding for hex values. Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/boot/boot.h | 3 + arch/s390/boot/pgm_check_info.c | 143 ++++++++++++++++++---------------------- 2 files changed, 68 insertions(+), 78 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index d5cf2e8c5eb4..ca485bfa4e50 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -4,6 +4,8 @@ #include +#include + void startup_kernel(void); unsigned long detect_memory(void); bool is_ipl_block_dump(void); @@ -14,6 +16,7 @@ void verify_facilities(void); void print_missing_facilities(void); void print_pgm_check_info(void); unsigned long get_random_base(unsigned long safe_addr); +void __printf(1, 2) decompressor_printk(const char *fmt, ...); extern const char kernel_version[]; extern unsigned long memory_limit; diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index a3c9862bcede..829548c22444 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -1,99 +1,86 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include #include +#include #include "boot.h" const char hex_asc[] = "0123456789abcdef"; -#define add_val_as_hex(dst, val) \ - __add_val_as_hex(dst, (const unsigned char *)&val, sizeof(val)) - -static char *__add_val_as_hex(char *dst, const unsigned char *src, size_t count) +static char *as_hex(char *dst, unsigned long val, int pad) { - while (count--) - dst = hex_byte_pack(dst, *src++); - return dst; -} + char *p, *end = p = dst + max(pad, (int)__fls(val | 1) / 4 + 1); -static char *add_str(char *dst, char *src) -{ - strcpy(dst, src); - return dst + strlen(dst); + for (*p-- = 0; p >= dst; val >>= 4) + *p-- = hex_asc[val & 0x0f]; + return end; } -void print_pgm_check_info(void) +void decompressor_printk(const char *fmt, ...) { - struct psw_bits *psw = &psw_bits(S390_lowcore.psw_save_area); - unsigned short ilc = S390_lowcore.pgm_ilc >> 1; - char buf[256]; - int row, col; - char *p; + char buf[1024] = { 0 }; + char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */ + unsigned long pad; + char *p = buf; + va_list args; - add_str(buf, "Linux version "); - strlcat(buf, kernel_version, sizeof(buf) - 1); - strlcat(buf, "\n", sizeof(buf)); - sclp_early_printk(buf); - - p = add_str(buf, "Kernel fault: interruption code "); - p = add_val_as_hex(buf + strlen(buf), S390_lowcore.pgm_code); - p = add_str(p, " ilc:"); - *p++ = hex_asc_lo(ilc); - add_str(p, "\n"); - sclp_early_printk(buf); - - if (kaslr_enabled) { - p = add_str(buf, "Kernel random base: "); - p = add_val_as_hex(p, __kaslr_offset); - add_str(p, "\n"); - sclp_early_printk(buf); + va_start(args, fmt); + for (; p < end && *fmt; fmt++) { + if (*fmt != '%') { + *p++ = *fmt; + continue; + } + pad = isdigit(*++fmt) ? simple_strtol(fmt, (char **)&fmt, 10) : 0; + switch (*fmt) { + case 's': + p = buf + strlcat(buf, va_arg(args, char *), sizeof(buf)); + break; + case 'l': + if (*++fmt != 'x' || end - p <= max(sizeof(long) * 2, pad)) + goto out; + p = as_hex(p, va_arg(args, unsigned long), pad); + break; + case 'x': + if (end - p <= max(sizeof(int) * 2, pad)) + goto out; + p = as_hex(p, va_arg(args, unsigned int), pad); + break; + default: + goto out; + } } - - p = add_str(buf, "PSW : "); - p = add_val_as_hex(p, S390_lowcore.psw_save_area.mask); - p = add_str(p, " "); - p = add_val_as_hex(p, S390_lowcore.psw_save_area.addr); - add_str(p, "\n"); +out: + va_end(args); sclp_early_printk(buf); +} - p = add_str(buf, " R:"); - *p++ = hex_asc_lo(psw->per); - p = add_str(p, " T:"); - *p++ = hex_asc_lo(psw->dat); - p = add_str(p, " IO:"); - *p++ = hex_asc_lo(psw->io); - p = add_str(p, " EX:"); - *p++ = hex_asc_lo(psw->ext); - p = add_str(p, " Key:"); - *p++ = hex_asc_lo(psw->key); - p = add_str(p, " M:"); - *p++ = hex_asc_lo(psw->mcheck); - p = add_str(p, " W:"); - *p++ = hex_asc_lo(psw->wait); - p = add_str(p, " P:"); - *p++ = hex_asc_lo(psw->pstate); - p = add_str(p, " AS:"); - *p++ = hex_asc_lo(psw->as); - p = add_str(p, " CC:"); - *p++ = hex_asc_lo(psw->cc); - p = add_str(p, " PM:"); - *p++ = hex_asc_lo(psw->pm); - p = add_str(p, " RI:"); - *p++ = hex_asc_lo(psw->ri); - p = add_str(p, " EA:"); - *p++ = hex_asc_lo(psw->eaba); - add_str(p, "\n"); - sclp_early_printk(buf); +void print_pgm_check_info(void) +{ + unsigned long *gpregs = (unsigned long *)S390_lowcore.gpregs_save_area; + struct psw_bits *psw = &psw_bits(S390_lowcore.psw_save_area); - for (row = 0; row < 4; row++) { - p = add_str(buf, row == 0 ? "GPRS:" : " "); - for (col = 0; col < 4; col++) { - p = add_str(p, " "); - p = add_val_as_hex(p, S390_lowcore.gpregs_save_area[row * 4 + col]); - } - add_str(p, "\n"); - sclp_early_printk(buf); - } + decompressor_printk("Linux version %s\n", kernel_version); + decompressor_printk("Kernel fault: interruption code %04x ilc:%x\n", + S390_lowcore.pgm_code, S390_lowcore.pgm_ilc >> 1); + if (kaslr_enabled) + decompressor_printk("Kernel random base: %lx\n", __kaslr_offset); + decompressor_printk("PSW : %016lx %016lx\n", + S390_lowcore.psw_save_area.mask, + S390_lowcore.psw_save_area.addr); + decompressor_printk( + " R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n", + psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck, + psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, + psw->eaba); + decompressor_printk("GPRS: %016lx %016lx %016lx %016lx\n", + gpregs[0], gpregs[1], gpregs[2], gpregs[3]); + decompressor_printk(" %016lx %016lx %016lx %016lx\n", + gpregs[4], gpregs[5], gpregs[6], gpregs[7]); + decompressor_printk(" %016lx %016lx %016lx %016lx\n", + gpregs[8], gpregs[9], gpregs[10], gpregs[11]); + decompressor_printk(" %016lx %016lx %016lx %016lx\n", + gpregs[12], gpregs[13], gpregs[14], gpregs[15]); } -- cgit v1.2.3-70-g09d2 From ec55d1e1dbea990145644bd6838c061e8113cc4d Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 10 Nov 2020 17:05:29 +0100 Subject: s390/decompressor: correct some asm symbols annotations Use SYM_CODE_* annotations for asm functions, so that function lengths are recognized correctly. Also currently the most part of startup is marked as startup_kdump. Move misplaced startup_kdump where it belongs. $ nm -n -S arch/s390/boot/compressed/vmlinux Before: 0000000000010000 T startup 0000000000010010 T startup_kdump After: 0000000000010000 0000000000000014 T startup 0000000000010014 00000000000000b0 t startup_normal 0000000000010180 00000000000000b2 t startup_kdump Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/boot/head.S | 15 ++++++++------- arch/s390/boot/head_kdump.S | 8 ++++---- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index bc34ad2c2e49..cf70917f98e8 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -279,8 +279,8 @@ iplstart: # or linload or SALIPL # .org 0x10000 -ENTRY(startup) - j .Lep_startup_normal +SYM_CODE_START(startup) + j startup_normal .org EP_OFFSET # # This is a list of s390 kernel entry points. At address 0x1000f the number of @@ -294,9 +294,9 @@ ENTRY(startup) # kdump startup-code at 0x10010, running in 64 bit absolute addressing mode # .org 0x10010 -ENTRY(startup_kdump) - j .Lep_startup_kdump -.Lep_startup_normal: + j startup_kdump +SYM_CODE_END(startup) +SYM_CODE_START_LOCAL(startup_normal) mvi __LC_AR_MODE_ID,1 # set esame flag slr %r0,%r0 # set cpuid to zero lhi %r1,2 # mode 2 = esame (dump) @@ -322,6 +322,7 @@ ENTRY(startup_kdump) l %r15,.Lstack-.LPG0(%r13) brasl %r14,verify_facilities brasl %r14,startup_kernel +SYM_CODE_END(startup_normal) .Lstack: .long 0x8000 + (1<<(PAGE_SHIFT+BOOT_STACK_ORDER)) - STACK_FRAME_OVERHEAD @@ -371,7 +372,7 @@ ENTRY(startup_kdump) # It simply saves general/control registers and psw in # the save area and does disabled wait with a faulty address. # -ENTRY(startup_pgm_check_handler) +SYM_CODE_START_LOCAL(startup_pgm_check_handler) stmg %r8,%r15,__LC_SAVE_AREA_SYNC la %r8,4095 stctg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r8) @@ -390,9 +391,9 @@ ENTRY(startup_pgm_check_handler) la %r8,4095 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8) lpswe __LC_RETURN_PSW # disabled wait +SYM_CODE_END(startup_pgm_check_handler) .Ldump_info_stack: .long 0x5000 + PAGE_SIZE - STACK_FRAME_OVERHEAD -ENDPROC(startup_pgm_check_handler) # # params at 10400 (setup.h) diff --git a/arch/s390/boot/head_kdump.S b/arch/s390/boot/head_kdump.S index 174d6959bf5b..f015469e7db9 100644 --- a/arch/s390/boot/head_kdump.S +++ b/arch/s390/boot/head_kdump.S @@ -19,8 +19,7 @@ # Note: This code has to be position independent # -.align 2 -.Lep_startup_kdump: +SYM_CODE_START_LOCAL(startup_kdump) lhi %r1,2 # mode 2 = esame (dump) sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to esame mode sam64 # Switch to 64 bit addressing @@ -87,14 +86,15 @@ startup_kdump_relocated: basr %r13,0 0: lpswe .Lrestart_psw-0b(%r13) # Start new kernel... +SYM_CODE_END(startup_kdump) .align 8 .Lrestart_psw: .quad 0x0000000080000000,0x0000000000000000 + startup #else -.align 2 -.Lep_startup_kdump: +SYM_CODE_START_LOCAL(startup_kdump) larl %r13,startup_kdump_crash lpswe 0(%r13) +SYM_CODE_END(startup_kdump) .align 8 startup_kdump_crash: .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash -- cgit v1.2.3-70-g09d2 From 246218962e2175cd1dfa1e5467e9bffae5cc7b5e Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 11 Nov 2020 10:59:40 +0100 Subject: s390/decompressor: add symbols support Information printed by print_pgm_check_info() is crucial for debugging decompressor problems. Printing instruction addresses is better than nothing, but turns further debugging into tedious job of figuring out which function those addresses correspond to. This change adds simplistic symbols resolution support. And adds %pS format specifier support to decompressor_printk(). Decompressor symbols list is extracted and sorted with nm -n -S: ... 0000000000010000 0000000000000014 T startup 0000000000010014 00000000000000b0 t startup_normal 0000000000010180 00000000000000b2 t startup_kdump ... Then functions are filtered and contracted to a form: "10000 14 startup\0""10014 b0 startup_normal\0""10180 b2 startup_kdump\0" ... Which makes it trivial to find beginning of an entry and names are 0 terminated, so could be used as is. Symbols are binary-searched. To get symbols list with final addresses and then get it into the decompressor's image the same trick as for kallsyms is used. Decompressor's vmlinux is linked twice. Symbols are stored in .decompressor.syms section, current size is about 2kb. Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/boot/compressed/.gitignore | 1 + arch/s390/boot/compressed/Makefile | 24 ++++++++++-- arch/s390/boot/compressed/vmlinux.lds.S | 8 ++++ arch/s390/boot/pgm_check_info.c | 69 ++++++++++++++++++++++++++++++++- 4 files changed, 97 insertions(+), 5 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore index 765a08f1bd77..01d93832cf4a 100644 --- a/arch/s390/boot/compressed/.gitignore +++ b/arch/s390/boot/compressed/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only vmlinux vmlinux.lds +vmlinux.syms diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index b235ed95a3d8..e0502222706a 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -10,21 +10,39 @@ GCOV_PROFILE := n UBSAN_SANITIZE := n KASAN_SANITIZE := n -obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) piggy.o info.o +obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o +obj-all := $(obj-y) piggy.o syms.o targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 -targets += info.bin $(obj-y) +targets += info.bin syms.bin vmlinux.syms $(obj-all) KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) OBJCOPYFLAGS := OBJECTS := $(addprefix $(obj)/,$(obj-y)) +OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all)) LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T -$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) FORCE +$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS_ALL) FORCE $(call if_changed,ld) +LDFLAGS_vmlinux.syms := --oformat $(LD_BFD) -e startup -T +$(obj)/vmlinux.syms: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) FORCE + $(call if_changed,ld) + +quiet_cmd_dumpsyms = DUMPSYMS $< +define cmd_dumpsyms + $(NM) -n -S --format=bsd "$<" | $(PERL) -ne '/(\w+)\s+(\w+)\s+[tT]\s+(\w+)/ and printf "%x %x %s\0",hex $$1,hex $$2,$$3' > "$@" +endef + +$(obj)/syms.bin: $(obj)/vmlinux.syms FORCE + $(call if_changed,dumpsyms) + +OBJCOPYFLAGS_syms.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.decompressor.syms +$(obj)/syms.o: $(obj)/syms.bin FORCE + $(call if_changed,objcopy) + OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=load $(obj)/info.bin: vmlinux FORCE $(call if_changed,objcopy) diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S index 9427e2cd0c15..152ac6f875c0 100644 --- a/arch/s390/boot/compressed/vmlinux.lds.S +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -82,6 +82,14 @@ SECTIONS *(.vmlinux.info) } + .decompressor.syms : { + . += 1; /* make sure we have \0 before the first entry */ + . = ALIGN(2); + _decompressor_syms_start = .; + *(.decompressor.syms) + _decompressor_syms_end = .; + } + #ifdef CONFIG_KERNEL_UNCOMPRESSED . = 0x100000; #else diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index 829548c22444..8ba8136adb4d 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -19,6 +19,65 @@ static char *as_hex(char *dst, unsigned long val, int pad) return end; } +static char *symstart(char *p) +{ + while (*p) + p--; + return p + 1; +} + +extern char _decompressor_syms_start[], _decompressor_syms_end[]; +static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned short *len) +{ + /* symbol entries are in a form "10000 c4 startup\0" */ + char *a = _decompressor_syms_start; + char *b = _decompressor_syms_end; + unsigned long start; + unsigned long size; + char *pivot; + char *endp; + + while (a < b) { + pivot = symstart(a + (b - a) / 2); + start = simple_strtoull(pivot, &endp, 16); + size = simple_strtoull(endp + 1, &endp, 16); + if (ip < start) { + b = pivot; + continue; + } + if (ip > start + size) { + a = pivot + strlen(pivot) + 1; + continue; + } + *off = ip - start; + *len = size; + return endp + 1; + } + return NULL; +} + +static noinline char *strsym(void *ip) +{ + static char buf[64]; + unsigned short off; + unsigned short len; + char *p; + + p = findsym((unsigned long)ip, &off, &len); + if (p) { + strncpy(buf, p, sizeof(buf)); + /* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */ + p = buf + strnlen(buf, sizeof(buf) - 15); + strcpy(p, "+0x"); + p = as_hex(p + 3, off, 0); + strcpy(p, "/0x"); + as_hex(p + 3, len, 0); + } else { + as_hex(buf, (unsigned long)ip, 16); + } + return buf; +} + void decompressor_printk(const char *fmt, ...) { char buf[1024] = { 0 }; @@ -38,6 +97,11 @@ void decompressor_printk(const char *fmt, ...) case 's': p = buf + strlcat(buf, va_arg(args, char *), sizeof(buf)); break; + case 'p': + if (*++fmt != 'S') + goto out; + p = buf + strlcat(buf, strsym(va_arg(args, void *)), sizeof(buf)); + break; case 'l': if (*++fmt != 'x' || end - p <= max(sizeof(long) * 2, pad)) goto out; @@ -67,9 +131,10 @@ void print_pgm_check_info(void) S390_lowcore.pgm_code, S390_lowcore.pgm_ilc >> 1); if (kaslr_enabled) decompressor_printk("Kernel random base: %lx\n", __kaslr_offset); - decompressor_printk("PSW : %016lx %016lx\n", + decompressor_printk("PSW : %016lx %016lx (%pS)\n", S390_lowcore.psw_save_area.mask, - S390_lowcore.psw_save_area.addr); + S390_lowcore.psw_save_area.addr, + (void *)S390_lowcore.psw_save_area.addr); decompressor_printk( " R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n", psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck, -- cgit v1.2.3-70-g09d2 From 8977ab65b894d889c3919581bf545ed106a9a1a5 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 10 Nov 2020 17:05:35 +0100 Subject: s390/decompressor: add stacktrace support Decompressor works on a single statically allocated stack. Stacktrace implementation with -mbackchain just takes few lines of code. Linux version 5.10.0-rc3-22793-g0f84a355b776-dirty (gor@tuxmaker) #27 SMP PREEMPT Mon Nov 9 17:30:18 CET 2020 Kernel fault: interruption code 0005 ilc:2 PSW : 0000000180000000 0000000000012f92 (parse_boot_command_line+0x27a/0x46c) R:0 T:0 IO:0 EX:0 Key:0 M:0 W:0 P:0 AS:0 CC:0 PM:0 RI:0 EA:3 GPRS: 0000000000000000 00ffffffffffffff 0000000000000000 000000000001a62c 000000000000bf60 0000000000000000 00000000000003c0 0000000000000000 0000000000000080 000000000002322d 000000007f29ef20 0000000000efd018 000000000311c000 0000000000010070 0000000000012f82 000000000000bea8 Call Trace: (sp:000000000000bea8 [<000000000002016e>] 000000000002016e) sp:000000000000bf18 [<0000000000012408>] startup_kernel+0x88/0x2fc sp:000000000000bf60 [<00000000000100c4>] startup_normal+0xb0/0xb0 Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/Makefile | 2 +- arch/s390/boot/boot.h | 5 +++++ arch/s390/boot/head.S | 3 ++- arch/s390/boot/pgm_check_info.c | 23 +++++++++++++++++++++++ arch/s390/include/asm/thread_info.h | 2 +- 5 files changed, 32 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/Makefile b/arch/s390/Makefile index ba94b03c8b2f..8db267d2a543 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -25,7 +25,7 @@ KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__ KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf)) KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY -KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float +KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables KBUILD_CFLAGS_DECOMPRESSOR += -ffreestanding KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member) diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index ca485bfa4e50..8b50967f5804 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -4,6 +4,10 @@ #include +#define BOOT_STACK_OFFSET 0x8000 + +#ifndef __ASSEMBLY__ + #include void startup_kernel(void); @@ -25,4 +29,5 @@ extern int kaslr_enabled; unsigned long read_ipl_report(unsigned long safe_offset); +#endif /* __ASSEMBLY__ */ #endif /* BOOT_BOOT_H */ diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index cf70917f98e8..dacb7813f982 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -28,6 +28,7 @@ #include #include #include +#include "boot.h" #define ARCH_OFFSET 4 @@ -325,7 +326,7 @@ SYM_CODE_START_LOCAL(startup_normal) SYM_CODE_END(startup_normal) .Lstack: - .long 0x8000 + (1<<(PAGE_SHIFT+BOOT_STACK_ORDER)) - STACK_FRAME_OVERHEAD + .long BOOT_STACK_OFFSET + BOOT_STACK_SIZE - STACK_FRAME_OVERHEAD .align 8 6: .long 0x7fffffff,0xffffffff .Lext_new_psw: diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index 8ba8136adb4d..d8bfc5673956 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -121,6 +122,27 @@ out: sclp_early_printk(buf); } +static noinline void print_stacktrace(void) +{ + struct stack_info boot_stack = { STACK_TYPE_TASK, BOOT_STACK_OFFSET, + BOOT_STACK_OFFSET + BOOT_STACK_SIZE }; + unsigned long sp = S390_lowcore.gpregs_save_area[15]; + bool first = true; + + decompressor_printk("Call Trace:\n"); + while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) { + struct stack_frame *sf = (struct stack_frame *)sp; + + decompressor_printk(first ? "(sp:%016lx [<%016lx>] %pS)\n" : + " sp:%016lx [<%016lx>] %pS\n", + sp, sf->gprs[8], (void *)sf->gprs[8]); + if (sf->back_chain <= sp) + break; + sp = sf->back_chain; + first = false; + } +} + void print_pgm_check_info(void) { unsigned long *gpregs = (unsigned long *)S390_lowcore.gpregs_save_area; @@ -148,4 +170,5 @@ void print_pgm_check_info(void) gpregs[8], gpregs[9], gpregs[10], gpregs[11]); decompressor_printk(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]); + print_stacktrace(); } diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 13a04fcf7762..ce788f3e534d 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -18,7 +18,7 @@ #else #define THREAD_SIZE_ORDER 2 #endif -#define BOOT_STACK_ORDER 2 +#define BOOT_STACK_SIZE (PAGE_SIZE << 2) #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #ifndef __ASSEMBLY__ -- cgit v1.2.3-70-g09d2 From ba1a6be994e8444baf23c14d7be045811197ccb3 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 10 Nov 2020 23:30:23 +0100 Subject: s390/decompressor: print cmdline and BEAR on pgm_check Add kernel command line and last breaking event. The kernel command line is taken from early_command_line and printed only if kernel is not running as protected virtualization guest and if it has been already initialized from the "COMMAND_LINE". Linux version 5.10.0-rc3-22794-gecaa72788df0-dirty (gor@tuxmaker) #28 SMP PREEMPT Mon Nov 9 17:41:20 CET 2020 Kernel command line: audit_enable=0 audit=0 selinux=0 crashkernel=296M root=/dev/dasda1 dasd=ec5b memblock=debug die Kernel fault: interruption code 0005 ilc:2 PSW : 0000000180000000 0000000000012f92 (parse_boot_command_line+0x27a/0x46c) R:0 T:0 IO:0 EX:0 Key:0 M:0 W:0 P:0 AS:0 CC:0 PM:0 RI:0 EA:3 GPRS: 0000000000000000 00ffffffffffffff 0000000000000000 000000000001a65c 000000000000bf60 0000000000000000 00000000000003c0 0000000000000000 0000000000000080 000000000002322d 000000007f29ef20 0000000000efd018 000000000311c000 0000000000010070 0000000000012f82 000000000000bea8 Call Trace: (sp:000000000000bea8 [<000000000002016e>] 000000000002016e) sp:000000000000bf18 [<0000000000012408>] startup_kernel+0x88/0x2fc sp:000000000000bf60 [<00000000000100c4>] startup_normal+0xb0/0xb0 Last Breaking-Event-Address: [<00000000000135ba>] strcmp+0x22/0x24 Reviewed-by: Alexander Egorenkov Acked-by: Viktor Mihajlovski Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/boot/pgm_check_info.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index d8bfc5673956..3a46abed2549 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -3,9 +3,11 @@ #include #include #include +#include #include #include #include +#include #include #include "boot.h" @@ -149,6 +151,8 @@ void print_pgm_check_info(void) struct psw_bits *psw = &psw_bits(S390_lowcore.psw_save_area); decompressor_printk("Linux version %s\n", kernel_version); + if (!is_prot_virt_guest() && early_command_line[0]) + decompressor_printk("Kernel command line: %s\n", early_command_line); decompressor_printk("Kernel fault: interruption code %04x ilc:%x\n", S390_lowcore.pgm_code, S390_lowcore.pgm_ilc >> 1); if (kaslr_enabled) @@ -171,4 +175,7 @@ void print_pgm_check_info(void) decompressor_printk(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]); print_stacktrace(); + decompressor_printk("Last Breaking-Event-Address:\n"); + decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.breaking_event_addr, + (void *)S390_lowcore.breaking_event_addr); } -- cgit v1.2.3-70-g09d2 From 074ff04e279ab8a3f97fefd1b8a313b1e4f04e9b Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 11 Nov 2020 08:13:14 +0100 Subject: s390/stp: let subsys_system_register() sysfs attributes Instead of creating the sysfs attributes for the stp root_dev by hand, pass them to subsys_system_register() as parameter. This also ensures that the attributes are available when the KOBJ_ADD event is raised. Signed-off-by: Julian Wiedmann Signed-off-by: Heiko Carstens --- arch/s390/kernel/time.c | 44 ++++++++++++++------------------------------ 1 file changed, 14 insertions(+), 30 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 0ac30ee2c633..c59cb44fbb7d 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -927,41 +927,25 @@ static ssize_t online_store(struct device *dev, */ static DEVICE_ATTR_RW(online); -static struct device_attribute *stp_attributes[] = { - &dev_attr_ctn_id, - &dev_attr_ctn_type, - &dev_attr_dst_offset, - &dev_attr_leap_seconds, - &dev_attr_online, - &dev_attr_leap_seconds_scheduled, - &dev_attr_stratum, - &dev_attr_time_offset, - &dev_attr_time_zone_offset, - &dev_attr_timing_mode, - &dev_attr_timing_state, +static struct attribute *stp_dev_attrs[] = { + &dev_attr_ctn_id.attr, + &dev_attr_ctn_type.attr, + &dev_attr_dst_offset.attr, + &dev_attr_leap_seconds.attr, + &dev_attr_online.attr, + &dev_attr_leap_seconds_scheduled.attr, + &dev_attr_stratum.attr, + &dev_attr_time_offset.attr, + &dev_attr_time_zone_offset.attr, + &dev_attr_timing_mode.attr, + &dev_attr_timing_state.attr, NULL }; +ATTRIBUTE_GROUPS(stp_dev); static int __init stp_init_sysfs(void) { - struct device_attribute **attr; - int rc; - - rc = subsys_system_register(&stp_subsys, NULL); - if (rc) - goto out; - for (attr = stp_attributes; *attr; attr++) { - rc = device_create_file(stp_subsys.dev_root, *attr); - if (rc) - goto out_unreg; - } - return 0; -out_unreg: - for (; attr >= stp_attributes; attr--) - device_remove_file(stp_subsys.dev_root, *attr); - bus_unregister(&stp_subsys); -out: - return rc; + return subsys_system_register(&stp_subsys, stp_dev_groups); } device_initcall(stp_init_sysfs); -- cgit v1.2.3-70-g09d2 From af71657c153fc18d7bcd35a3d5240fbd1ac7ebbd Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 10 Nov 2020 10:36:21 +0100 Subject: s390/vmem: remove redundant check Reviewed-by: David Hildenbrand Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index b239f2ba93b0..56ab9bb770f3 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -223,7 +223,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, if (!add) { if (pmd_none(*pmd)) continue; - if (pmd_large(*pmd) && !add) { + if (pmd_large(*pmd)) { if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE)) { if (!direct) -- cgit v1.2.3-70-g09d2 From 12bb4c682354740e4aaf0103a9bf283df42adaa9 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 10 Nov 2020 10:36:23 +0100 Subject: s390/vmem: make variable and function names consistent Rename some variable and functions to better clarify what they are and what they do. Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 56ab9bb770f3..01f3a5f58e64 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -76,20 +76,20 @@ static void vmem_pte_free(unsigned long *table) /* * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges - * from unused_pmd_start to next PMD_SIZE boundary. + * from unused_sub_pmd_start to next PMD_SIZE boundary. */ -static unsigned long unused_pmd_start; +static unsigned long unused_sub_pmd_start; -static void vmemmap_flush_unused_pmd(void) +static void vmemmap_flush_unused_sub_pmd(void) { - if (!unused_pmd_start) + if (!unused_sub_pmd_start) return; - memset(__va(unused_pmd_start), PAGE_UNUSED, - ALIGN(unused_pmd_start, PMD_SIZE) - unused_pmd_start); - unused_pmd_start = 0; + memset(__va(unused_sub_pmd_start), PAGE_UNUSED, + ALIGN(unused_sub_pmd_start, PMD_SIZE) - unused_sub_pmd_start); + unused_sub_pmd_start = 0; } -static void __vmemmap_use_sub_pmd(unsigned long start, unsigned long end) +static void vmemmap_mark_sub_pmd_used(unsigned long start, unsigned long end) { /* * As we expect to add in the same granularity as we remove, it's @@ -106,24 +106,24 @@ static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end) * We only optimize if the new used range directly follows the * previously unused range (esp., when populating consecutive sections). */ - if (unused_pmd_start == start) { - unused_pmd_start = end; - if (likely(IS_ALIGNED(unused_pmd_start, PMD_SIZE))) - unused_pmd_start = 0; + if (unused_sub_pmd_start == start) { + unused_sub_pmd_start = end; + if (likely(IS_ALIGNED(unused_sub_pmd_start, PMD_SIZE))) + unused_sub_pmd_start = 0; return; } - vmemmap_flush_unused_pmd(); - __vmemmap_use_sub_pmd(start, end); + vmemmap_flush_unused_sub_pmd(); + vmemmap_mark_sub_pmd_used(start, end); } static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) { void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); - vmemmap_flush_unused_pmd(); + vmemmap_flush_unused_sub_pmd(); /* Could be our memmap page is filled with PAGE_UNUSED already ... */ - __vmemmap_use_sub_pmd(start, end); + vmemmap_mark_sub_pmd_used(start, end); /* Mark the unused parts of the new memmap page PAGE_UNUSED. */ if (!IS_ALIGNED(start, PMD_SIZE)) @@ -134,7 +134,7 @@ static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) * unused range in the populated PMD. */ if (!IS_ALIGNED(end, PMD_SIZE)) - unused_pmd_start = end; + unused_sub_pmd_start = end; } /* Returns true if the PMD is completely unused and can be freed. */ @@ -142,7 +142,7 @@ static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end) { void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); - vmemmap_flush_unused_pmd(); + vmemmap_flush_unused_sub_pmd(); memset(__va(start), PAGE_UNUSED, end - start); return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE); } -- cgit v1.2.3-70-g09d2 From 87d5986345219a7e4f204726d9085ea87f3e22d0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 16 Nov 2020 08:06:40 +0100 Subject: s390/mm: remove set_fs / rework address space handling Remove set_fs support from s390. With doing this rework address space handling and simplify it. As a result address spaces are now setup like this: CPU running in | %cr1 ASCE | %cr7 ASCE | %cr13 ASCE ----------------------------|-----------|-----------|----------- user space | user | user | kernel kernel, normal execution | kernel | user | kernel kernel, kvm guest execution | gmap | user | kernel To achieve this the getcpu vdso syscall is removed in order to avoid secondary address mode and a separate vdso address space in for user space. The getcpu vdso syscall will be implemented differently with a subsequent patch. The kernel accesses user space always via secondary address space. This happens in different ways: - with mvcos in home space mode and directly read/write to secondary address space - with mvcs/mvcp in primary space mode and copy from primary space to secondary space or vice versa - with e.g. cs in secondary space mode and access secondary space Switching translation modes happens with sacf before and after instructions which access user space, like before. Lazy handling of control register reloading is removed in the hope to make everything simpler, but at the cost of making kernel entry and exit a bit slower. That is: on kernel entry the primary asce is always changed to contain the kernel asce, and on kernel exit the primary asce is changed again so it contains the user asce. In kernel mode there is only one exception to the primary asce: when kvm guests are executed the primary asce contains the gmap asce (which describes the guest address space). The primary asce is reset to kernel asce whenever kvm guest execution is interrupted, so that this doesn't has to be taken into account for any user space accesses. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 1 - arch/s390/include/asm/futex.h | 6 --- arch/s390/include/asm/lowcore.h | 4 +- arch/s390/include/asm/mmu_context.h | 25 ++-------- arch/s390/include/asm/processor.h | 7 --- arch/s390/include/asm/ptrace.h | 1 + arch/s390/include/asm/uaccess.h | 22 +-------- arch/s390/include/asm/vdso.h | 25 ---------- arch/s390/kernel/asm-offsets.c | 7 +-- arch/s390/kernel/entry.S | 75 ++++++++++-------------------- arch/s390/kernel/entry.h | 1 - arch/s390/kernel/process.c | 14 ------ arch/s390/kernel/smp.c | 10 +--- arch/s390/kernel/vdso.c | 57 ----------------------- arch/s390/kernel/vdso64/Makefile | 2 +- arch/s390/kernel/vdso64/getcpu.S | 31 ------------- arch/s390/kernel/vdso64/vdso64.lds.S | 1 - arch/s390/lib/uaccess.c | 89 ++---------------------------------- arch/s390/mm/fault.c | 29 +++--------- arch/s390/mm/pgalloc.c | 13 +----- arch/s390/pci/pci_mmio.c | 6 --- 21 files changed, 51 insertions(+), 375 deletions(-) delete mode 100644 arch/s390/kernel/vdso64/getcpu.S (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 28bdb11e9db2..64c72b1ecc77 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -190,7 +190,6 @@ config S390 select PCI_DOMAINS if PCI select PCI_MSI if PCI select PCI_MSI_ARCH_FALLBACKS if PCI_MSI - select SET_FS select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index 26f9144562c9..c22debfcebf1 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -26,9 +26,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) { int oldval = 0, newval, ret; - mm_segment_t old_fs; - old_fs = enable_sacf_uaccess(); switch (op) { case FUTEX_OP_SET: __futex_atomic_op("lr %2,%5\n", @@ -53,7 +51,6 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, default: ret = -ENOSYS; } - disable_sacf_uaccess(old_fs); if (!ret) *oval = oldval; @@ -64,10 +61,8 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { - mm_segment_t old_fs; int ret; - old_fs = enable_sacf_uaccess(); asm volatile( " sacf 256\n" "0: cs %1,%4,0(%5)\n" @@ -77,7 +72,6 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "=d" (ret), "+d" (oldval), "=m" (*uaddr) : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) : "cc", "memory"); - disable_sacf_uaccess(old_fs); *uval = oldval; return ret; } diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 612ed3c6d581..69ce9191eaf1 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -116,7 +116,7 @@ struct lowcore { /* Address space pointer. */ __u64 kernel_asce; /* 0x0380 */ __u64 user_asce; /* 0x0388 */ - __u64 vdso_asce; /* 0x0390 */ + __u8 pad_0x0390[0x0398-0x0390]; /* 0x0390 */ /* * The lpp and current_pid fields form a @@ -134,7 +134,7 @@ struct lowcore { __u32 spinlock_index; /* 0x03b0 */ __u32 fpu_flags; /* 0x03b4 */ __u64 percpu_offset; /* 0x03b8 */ - __u64 vdso_per_cpu_data; /* 0x03c0 */ + __u8 pad_0x03c0[0x03c8-0x03c0]; /* 0x03c0 */ __u64 machine_flags; /* 0x03c8 */ __u64 gmap; /* 0x03d0 */ __u8 pad_0x03d8[0x0400-0x03d8]; /* 0x03d8 */ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index cec19ae164eb..51def960a3dd 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -71,16 +71,6 @@ static inline int init_new_context(struct task_struct *tsk, #define destroy_context(mm) do { } while (0) -static inline void set_user_asce(struct mm_struct *mm) -{ - S390_lowcore.user_asce = mm->context.asce; - __ctl_load(S390_lowcore.user_asce, 1, 1); - clear_cpu_flag(CIF_ASCE_PRIMARY); -} - -mm_segment_t enable_sacf_uaccess(void); -void disable_sacf_uaccess(mm_segment_t old_fs); - static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { @@ -88,15 +78,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, S390_lowcore.user_asce = next->context.asce; cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); - /* Clear previous user-ASCE from CR1 and CR7 */ - if (!test_cpu_flag(CIF_ASCE_PRIMARY)) { - __ctl_load(S390_lowcore.kernel_asce, 1, 1); - set_cpu_flag(CIF_ASCE_PRIMARY); - } - if (test_cpu_flag(CIF_ASCE_SECONDARY)) { - __ctl_load(S390_lowcore.vdso_asce, 7, 7); - clear_cpu_flag(CIF_ASCE_SECONDARY); - } + /* Clear previous user-ASCE from CR7 */ + __ctl_load(S390_lowcore.kernel_asce, 7, 7); if (prev != next) cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); } @@ -115,7 +98,7 @@ static inline void finish_arch_post_lock_switch(void) __tlb_flush_mm_lazy(mm); preempt_enable(); } - set_fs(current->thread.mm_segment); + __ctl_load(S390_lowcore.user_asce, 7, 7); } #define enter_lazy_tlb(mm,tsk) do { } while (0) @@ -126,7 +109,7 @@ static inline void activate_mm(struct mm_struct *prev, { switch_mm(prev, next, current); cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); - set_user_asce(next); + __ctl_load(S390_lowcore.user_asce, 7, 7); } #endif /* __S390_MMU_CONTEXT_H */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 2043c562ec55..6b7269f51f83 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -14,8 +14,6 @@ #include -#define CIF_ASCE_PRIMARY 0 /* primary asce needs fixup / uaccess */ -#define CIF_ASCE_SECONDARY 1 /* secondary asce needs fixup / uaccess */ #define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ #define CIF_FPU 3 /* restore FPU registers */ #define CIF_IGNORE_IRQ 4 /* ignore interrupt (for udelay) */ @@ -23,8 +21,6 @@ #define CIF_MCCK_GUEST 6 /* machine check happening in guest */ #define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */ -#define _CIF_ASCE_PRIMARY BIT(CIF_ASCE_PRIMARY) -#define _CIF_ASCE_SECONDARY BIT(CIF_ASCE_SECONDARY) #define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY) #define _CIF_FPU BIT(CIF_FPU) #define _CIF_IGNORE_IRQ BIT(CIF_IGNORE_IRQ) @@ -102,8 +98,6 @@ extern void __bpon(void); #define HAVE_ARCH_PICK_MMAP_LAYOUT -typedef unsigned int mm_segment_t; - /* * Thread structure */ @@ -116,7 +110,6 @@ struct thread_struct { unsigned long hardirq_timer; /* task cputime in hardirq context */ unsigned long softirq_timer; /* task cputime in softirq context */ unsigned long sys_call_table; /* system call table address */ - mm_segment_t mm_segment; unsigned long gmap_addr; /* address of last gmap fault. */ unsigned int gmap_write_flag; /* gmap fault write indication */ unsigned int gmap_int_code; /* int code of last gmap fault */ diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 16b3e4396312..73ca7f7cac33 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -87,6 +87,7 @@ struct pt_regs unsigned int int_parm; unsigned long int_parm_long; unsigned long flags; + unsigned long cr1; }; /* diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index c868e7ee49b3..e59fd96a1561 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -18,24 +18,6 @@ #include #include -/* - * The fs value determines whether argument validity checking should be - * performed or not. If get_fs() == USER_DS, checking is performed, with - * get_fs() == KERNEL_DS, checking is bypassed. - * - * For historical reasons, these macros are grossly misnamed. - */ - -#define KERNEL_DS (0) -#define KERNEL_DS_SACF (1) -#define USER_DS (2) -#define USER_DS_SACF (3) - -#define get_fs() (current->thread.mm_segment) -#define uaccess_kernel() ((get_fs() & 2) == KERNEL_DS) - -void set_fs(mm_segment_t fs); - static inline int __range_ok(unsigned long addr, unsigned long size) { return 1; @@ -88,7 +70,7 @@ int __get_user_bad(void) __attribute__((noreturn)); static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) { - unsigned long spec = 0x010000UL; + unsigned long spec = 0x810000UL; int rc; switch (size) { @@ -121,7 +103,7 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) { - unsigned long spec = 0x01UL; + unsigned long spec = 0x81UL; int rc; switch (size) { diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 29b44a930e71..9b299c05abf1 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -12,32 +12,7 @@ #ifndef __ASSEMBLY__ -/* - * Note about the vdso_data and vdso_per_cpu_data structures: - * - * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the - * structure is supposed to be known only to the function in the vdso - * itself and may change without notice. - */ - -struct vdso_per_cpu_data { - /* - * Note: node_id and cpu_nr must be at adjacent memory locations. - * VDSO userspace must read both values with a single instruction. - */ - union { - __u64 getcpu_val; - struct { - __u32 node_id; - __u32 cpu_nr; - }; - }; -}; - extern struct vdso_data *vdso_data; -int vdso_alloc_per_cpu(struct lowcore *lowcore); -void vdso_free_per_cpu(struct lowcore *lowcore); - #endif /* __ASSEMBLY__ */ #endif /* __S390_VDSO_H__ */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 483051e10db3..79724d861dc9 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -48,6 +47,7 @@ int main(void) OFFSET(__PT_INT_PARM, pt_regs, int_parm); OFFSET(__PT_INT_PARM_LONG, pt_regs, int_parm_long); OFFSET(__PT_FLAGS, pt_regs, flags); + OFFSET(__PT_CR1, pt_regs, cr1); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); /* stack_frame offsets */ @@ -59,8 +59,6 @@ int main(void) OFFSET(__SF_SIE_REASON, stack_frame, empty1[3]); OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[4]); BLANK(); - OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val); - BLANK(); /* idle data offsets */ OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter); OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit); @@ -138,12 +136,11 @@ int main(void) OFFSET(__LC_RESTART_FN, lowcore, restart_fn); OFFSET(__LC_RESTART_DATA, lowcore, restart_data); OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source); + OFFSET(__LC_KERNEL_ASCE, lowcore, kernel_asce); OFFSET(__LC_USER_ASCE, lowcore, user_asce); - OFFSET(__LC_VDSO_ASCE, lowcore, vdso_asce); OFFSET(__LC_LPP, lowcore, lpp); OFFSET(__LC_CURRENT_PID, lowcore, current_pid); OFFSET(__LC_PERCPU_OFFSET, lowcore, percpu_offset); - OFFSET(__LC_VDSO_PER_CPU, lowcore, vdso_per_cpu_data); OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags); OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count); OFFSET(__LC_GMAP, lowcore, gmap); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index b454654ce5b5..d43ef46dc683 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -55,7 +55,7 @@ _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) -_CIF_WORK = (_CIF_ASCE_PRIMARY | _CIF_ASCE_SECONDARY | _CIF_FPU) +_CIF_WORK = (_CIF_FPU) _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) _LPP_OFFSET = __LC_LPP @@ -327,7 +327,7 @@ ENTRY(sie64a) BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) .Lsie_skip: ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce .Lsie_done: # some program checks are suppressing. C code (e.g. do_protection_exception) # will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There @@ -380,6 +380,7 @@ ENTRY(system_call) lg %r12,__LC_CURRENT lghi %r14,_PIF_SYSCALL .Lsysc_per: + lctlg %c1,%c1,__LC_KERNEL_ASCE lghi %r13,__TASK_thread lg %r15,__LC_KERNEL_STACK la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs @@ -427,8 +428,7 @@ ENTRY(system_call) jnz .Lsysc_work TSTMSK __TI_flags(%r12),_TIF_WORK jnz .Lsysc_work # check for work - TSTMSK __LC_CPU_FLAGS,(_CIF_WORK-_CIF_FPU) - jnz .Lsysc_work + lctlg %c1,%c1,__LC_USER_ASCE BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP TSTMSK __LC_CPU_FLAGS, _CIF_FPU jz .Lsysc_skip_fpu @@ -467,8 +467,6 @@ ENTRY(system_call) jo .Lsysc_sigpending TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME jo .Lsysc_notify_resume - TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) - jnz .Lsysc_asce j .Lsysc_return # @@ -478,26 +476,6 @@ ENTRY(system_call) larl %r14,.Lsysc_return jg schedule -# -# _CIF_ASCE_PRIMARY and/or _CIF_ASCE_SECONDARY set, load user space asce -# -.Lsysc_asce: - ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_SECONDARY - lctlg %c7,%c7,__LC_VDSO_ASCE # load secondary asce - TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY - jz .Lsysc_return -#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES - tm __LC_STFLE_FAC_LIST+3,0x10 # has MVCOS ? - jnz .Lsysc_set_fs_fixup - ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce - j .Lsysc_return -.Lsysc_set_fs_fixup: -#endif - larl %r14,.Lsysc_return - jg set_fs_fixup - - # # _TIF_SIGPENDING is set, call do_signal # @@ -634,8 +612,11 @@ ENTRY(pgm_check_handler) 0: lg %r12,__LC_CURRENT lghi %r11,0 lmg %r8,%r9,__LC_PGM_OLD_PSW - tmhh %r8,0x0001 # test problem state bit - jnz 3f # -> fault in user space + tmhh %r8,0x0001 # coming from user space? + jno .Lpgm_skip_asce + lctlg %c1,%c1,__LC_KERNEL_ASCE + j 3f +.Lpgm_skip_asce: #if IS_ENABLED(CONFIG_KVM) # cleanup critical section for program checks in sie64a lgr %r14,%r9 @@ -646,7 +627,7 @@ ENTRY(pgm_check_handler) jhe 1f lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit lghi %r11,_PIF_GUEST_FAULT #endif @@ -767,6 +748,10 @@ ENTRY(io_int_handler) xgr %r10,%r10 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) + tm __PT_PSW+1(%r11),0x01 # coming from user space? + jno .Lio_skip_asce + lctlg %c1,%c1,__LC_KERNEL_ASCE +.Lio_skip_asce: mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ @@ -808,6 +793,7 @@ ENTRY(io_int_handler) mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) tm __PT_PSW+1(%r11),0x01 # returning to user ? jno .Lio_exit_kernel + lctlg %c1,%c1,__LC_USER_ASCE BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP stpt __LC_EXIT_TIMER .Lio_exit_kernel: @@ -873,29 +859,8 @@ ENTRY(io_int_handler) jo .Lio_guarded_storage TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lio_vxrs - TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) - jnz .Lio_asce j .Lio_return -# -# _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce -# -.Lio_asce: - ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_SECONDARY - lctlg %c7,%c7,__LC_VDSO_ASCE # load secondary asce - TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY - jz .Lio_return -#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES - tm __LC_STFLE_FAC_LIST+3,0x10 # has MVCOS ? - jnz .Lio_set_fs_fixup - ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce - j .Lio_return -.Lio_set_fs_fixup: -#endif - larl %r14,.Lio_return - jg set_fs_fixup - # # CIF_FPU is set, restore floating-point controls and floating-point registers. # @@ -977,6 +942,10 @@ ENTRY(ext_int_handler) xgr %r10,%r10 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) + tm __PT_PSW+1(%r11),0x01 # coming from user space? + jno .Lext_skip_asce + lctlg %c1,%c1,__LC_KERNEL_ASCE +.Lext_skip_asce: lghi %r1,__LC_EXT_PARAMS2 mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS @@ -1206,6 +1175,9 @@ ENTRY(mcck_int_handler) xgr %r10,%r10 mvc __PT_R8(64,%r11),0(%r14) stmg %r8,%r9,__PT_PSW(%r11) + la %r14,4095 + mvc __PT_CR1(8,%r11),__LC_CREGS_SAVE_AREA-4095+8(%r14) + lctlg %c1,%c1,__LC_KERNEL_ASCE xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs @@ -1221,6 +1193,7 @@ ENTRY(mcck_int_handler) brasl %r14,s390_handle_mcck TRACE_IRQS_ON .Lmcck_return: + lctlg %c1,%c1,__PT_CR1(%r11) lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? @@ -1297,7 +1270,7 @@ ENDPROC(stack_overflow) 1: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + lctlg %c1,%c1,__LC_KERNEL_ASCE larl %r9,sie_exit # skip forward to sie_exit BR_EX %r14,%r11 diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index d2ca3fe51f8e..a16c33b32ab0 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -83,7 +83,6 @@ long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user DECLARE_PER_CPU(u64, mt_cycles[8]); void gs_load_bc_cb(struct pt_regs *regs); -void set_fs_fixup(void); unsigned long stack_alloc(void); void stack_free(unsigned long stack); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index ec801d3bbb37..bc3ca54edfb4 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -94,7 +94,6 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, /* Save access registers to new thread structure. */ save_access_regs(&p->thread.acrs[0]); /* start new process with ar4 pointing to the correct address space */ - p->thread.mm_segment = get_fs(); /* Don't copy debug registers */ memset(&p->thread.per_user, 0, sizeof(p->thread.per_user)); memset(&p->thread.per_event, 0, sizeof(p->thread.per_event)); @@ -208,16 +207,3 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) ret = PAGE_ALIGN(mm->brk + brk_rnd()); return (ret > mm->brk) ? ret : mm->brk; } - -void set_fs_fixup(void) -{ - struct pt_regs *regs = current_pt_regs(); - static bool warned; - - set_fs(USER_DS); - if (warned) - return; - WARN(1, "Unbalanced set_fs - int code: 0x%x\n", regs->int_code); - show_registers(regs); - warned = true; -} diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 390d97daa2b3..cac96f240dd8 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -47,7 +47,6 @@ #include #include #include -#include #include #include #include @@ -217,14 +216,10 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); if (nmi_alloc_per_cpu(lc)) goto out_async; - if (vdso_alloc_per_cpu(lc)) - goto out_mcesa; lowcore_ptr[cpu] = lc; pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc); return 0; -out_mcesa: - nmi_free_per_cpu(lc); out_async: stack_free(async_stack); out: @@ -245,7 +240,6 @@ static void pcpu_free_lowcore(struct pcpu *pcpu) pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); lowcore_ptr[pcpu - pcpu_devices] = NULL; - vdso_free_per_cpu(pcpu->lowcore); nmi_free_per_cpu(pcpu->lowcore); stack_free(async_stack); if (pcpu == &pcpu_devices[0]) @@ -271,7 +265,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->steal_timer = lc->avg_steal_timer = 0; __ctl_store(lc->cregs_save_area, 0, 15); lc->cregs_save_area[1] = lc->kernel_asce; - lc->cregs_save_area[7] = lc->vdso_asce; + lc->cregs_save_area[7] = lc->user_asce; save_access_regs((unsigned int *) lc->access_regs_save_area); memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, sizeof(lc->stfle_fac_list)); @@ -859,8 +853,6 @@ static void smp_init_secondary(void) S390_lowcore.last_update_clock = get_tod_clock(); restore_access_regs(S390_lowcore.access_regs_save_area); - set_cpu_flag(CIF_ASCE_PRIMARY); - set_cpu_flag(CIF_ASCE_SECONDARY); cpu_init(); rcu_cpu_starting(cpu); preempt_disable(); diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index f9da5b149141..53c983f1ea51 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -99,61 +99,6 @@ static union { u8 page[PAGE_SIZE]; } vdso_data_store __page_aligned_data; struct vdso_data *vdso_data = (struct vdso_data *)&vdso_data_store.data; -/* - * Allocate/free per cpu vdso data. - */ -#define SEGMENT_ORDER 2 - -int vdso_alloc_per_cpu(struct lowcore *lowcore) -{ - unsigned long segment_table, page_table, page_frame; - struct vdso_per_cpu_data *vd; - - segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); - page_table = get_zeroed_page(GFP_KERNEL); - page_frame = get_zeroed_page(GFP_KERNEL); - if (!segment_table || !page_table || !page_frame) - goto out; - arch_set_page_dat(virt_to_page(segment_table), SEGMENT_ORDER); - arch_set_page_dat(virt_to_page(page_table), 0); - - /* Initialize per-cpu vdso data page */ - vd = (struct vdso_per_cpu_data *) page_frame; - vd->cpu_nr = lowcore->cpu_nr; - vd->node_id = cpu_to_node(vd->cpu_nr); - - /* Set up page table for the vdso address space */ - memset64((u64 *)segment_table, _SEGMENT_ENTRY_EMPTY, _CRST_ENTRIES); - memset64((u64 *)page_table, _PAGE_INVALID, PTRS_PER_PTE); - - *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; - *(unsigned long *) page_table = _PAGE_PROTECT + page_frame; - - lowcore->vdso_asce = segment_table + - _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT; - lowcore->vdso_per_cpu_data = page_frame; - - return 0; - -out: - free_page(page_frame); - free_page(page_table); - free_pages(segment_table, SEGMENT_ORDER); - return -ENOMEM; -} - -void vdso_free_per_cpu(struct lowcore *lowcore) -{ - unsigned long segment_table, page_table, page_frame; - - segment_table = lowcore->vdso_asce & PAGE_MASK; - page_table = *(unsigned long *) segment_table; - page_frame = *(unsigned long *) page_table; - - free_page(page_frame); - free_page(page_table); - free_pages(segment_table, SEGMENT_ORDER); -} /* * This is called from binfmt_elf, we create the special vma for the @@ -240,8 +185,6 @@ static int __init vdso_init(void) } vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); vdso64_pagelist[vdso64_pages] = NULL; - if (vdso_alloc_per_cpu(&S390_lowcore)) - BUG(); get_page(virt_to_page(vdso_data)); diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 13cc5a3f9abf..b0cf58ae82fe 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -6,7 +6,7 @@ ARCH_REL_TYPE_ABS := R_390_COPY|R_390_GLOB_DAT|R_390_JMP_SLOT|R_390_RELATIVE ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT include $(srctree)/lib/vdso/Makefile -obj-vdso64 = vdso_user_wrapper.o note.o getcpu.o +obj-vdso64 = vdso_user_wrapper.o note.o obj-cvdso64 = vdso64_generic.o CFLAGS_REMOVE_vdso64_generic.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) diff --git a/arch/s390/kernel/vdso64/getcpu.S b/arch/s390/kernel/vdso64/getcpu.S deleted file mode 100644 index 3c04f7328500..000000000000 --- a/arch/s390/kernel/vdso64/getcpu.S +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Userland implementation of getcpu() for 64 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2016 - * Author(s): Martin Schwidefsky - */ -#include -#include -#include - - .text - .align 4 - .globl __kernel_getcpu - .type __kernel_getcpu,@function -__kernel_getcpu: - CFI_STARTPROC - sacf 256 - lm %r4,%r5,__VDSO_GETCPU_VAL(%r0) - sacf 0 - ltgr %r2,%r2 - jz 2f - st %r5,0(%r2) -2: ltgr %r3,%r3 - jz 3f - st %r4,0(%r3) -3: lghi %r2,0 - br %r14 - CFI_ENDPROC - .size __kernel_getcpu,.-__kernel_getcpu diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index 7ddb116b5e2e..b59006584a9d 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -135,7 +135,6 @@ VERSION __kernel_gettimeofday; __kernel_clock_gettime; __kernel_clock_getres; - __kernel_getcpu; local: *; }; diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 0267405ab7c6..0ffbe1fad72a 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -40,71 +40,10 @@ static inline int copy_with_mvcos(void) } #endif -void set_fs(mm_segment_t fs) -{ - current->thread.mm_segment = fs; - if (fs == USER_DS) { - __ctl_load(S390_lowcore.user_asce, 1, 1); - clear_cpu_flag(CIF_ASCE_PRIMARY); - } else { - __ctl_load(S390_lowcore.kernel_asce, 1, 1); - set_cpu_flag(CIF_ASCE_PRIMARY); - } - if (fs & 1) { - if (fs == USER_DS_SACF) - __ctl_load(S390_lowcore.user_asce, 7, 7); - else - __ctl_load(S390_lowcore.kernel_asce, 7, 7); - set_cpu_flag(CIF_ASCE_SECONDARY); - } -} -EXPORT_SYMBOL(set_fs); - -mm_segment_t enable_sacf_uaccess(void) -{ - mm_segment_t old_fs; - unsigned long asce, cr; - unsigned long flags; - - old_fs = current->thread.mm_segment; - if (old_fs & 1) - return old_fs; - /* protect against a concurrent page table upgrade */ - local_irq_save(flags); - current->thread.mm_segment |= 1; - asce = S390_lowcore.kernel_asce; - if (likely(old_fs == USER_DS)) { - __ctl_store(cr, 1, 1); - if (cr != S390_lowcore.kernel_asce) { - __ctl_load(S390_lowcore.kernel_asce, 1, 1); - set_cpu_flag(CIF_ASCE_PRIMARY); - } - asce = S390_lowcore.user_asce; - } - __ctl_store(cr, 7, 7); - if (cr != asce) { - __ctl_load(asce, 7, 7); - set_cpu_flag(CIF_ASCE_SECONDARY); - } - local_irq_restore(flags); - return old_fs; -} -EXPORT_SYMBOL(enable_sacf_uaccess); - -void disable_sacf_uaccess(mm_segment_t old_fs) -{ - current->thread.mm_segment = old_fs; - if (old_fs == USER_DS && test_facility(27)) { - __ctl_load(S390_lowcore.user_asce, 1, 1); - clear_cpu_flag(CIF_ASCE_PRIMARY); - } -} -EXPORT_SYMBOL(disable_sacf_uaccess); - static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, unsigned long size) { - register unsigned long reg0 asm("0") = 0x01UL; + register unsigned long reg0 asm("0") = 0x81UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; @@ -135,9 +74,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, unsigned long size) { unsigned long tmp1, tmp2; - mm_segment_t old_fs; - old_fs = enable_sacf_uaccess(); tmp1 = -256UL; asm volatile( " sacf 0\n" @@ -164,7 +101,6 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) : : "cc", "memory"); - disable_sacf_uaccess(old_fs); return size; } @@ -179,7 +115,7 @@ EXPORT_SYMBOL(raw_copy_from_user); static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, unsigned long size) { - register unsigned long reg0 asm("0") = 0x010000UL; + register unsigned long reg0 asm("0") = 0x810000UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; @@ -210,9 +146,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, unsigned long size) { unsigned long tmp1, tmp2; - mm_segment_t old_fs; - old_fs = enable_sacf_uaccess(); tmp1 = -256UL; asm volatile( " sacf 0\n" @@ -239,7 +173,6 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) : : "cc", "memory"); - disable_sacf_uaccess(old_fs); return size; } @@ -254,7 +187,7 @@ EXPORT_SYMBOL(raw_copy_to_user); static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from, unsigned long size) { - register unsigned long reg0 asm("0") = 0x010001UL; + register unsigned long reg0 asm("0") = 0x810081UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; @@ -277,10 +210,8 @@ static inline unsigned long copy_in_user_mvcos(void __user *to, const void __use static inline unsigned long copy_in_user_mvc(void __user *to, const void __user *from, unsigned long size) { - mm_segment_t old_fs; unsigned long tmp1; - old_fs = enable_sacf_uaccess(); asm volatile( " sacf 256\n" " aghi %0,-1\n" @@ -304,7 +235,6 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1) : : "cc", "memory"); - disable_sacf_uaccess(old_fs); return size; } @@ -318,7 +248,7 @@ EXPORT_SYMBOL(raw_copy_in_user); static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) { - register unsigned long reg0 asm("0") = 0x010000UL; + register unsigned long reg0 asm("0") = 0x810000UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; @@ -346,10 +276,8 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size static inline unsigned long clear_user_xc(void __user *to, unsigned long size) { - mm_segment_t old_fs; unsigned long tmp1, tmp2; - old_fs = enable_sacf_uaccess(); asm volatile( " sacf 256\n" " aghi %0,-1\n" @@ -378,7 +306,6 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size) EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2) : : "cc", "memory"); - disable_sacf_uaccess(old_fs); return size; } @@ -414,15 +341,9 @@ static inline unsigned long strnlen_user_srst(const char __user *src, unsigned long __strnlen_user(const char __user *src, unsigned long size) { - mm_segment_t old_fs; - unsigned long len; - if (unlikely(!size)) return 0; - old_fs = enable_sacf_uaccess(); - len = strnlen_user_srst(src, size); - disable_sacf_uaccess(old_fs); - return len; + return strnlen_user_srst(src, size); } EXPORT_SYMBOL(__strnlen_user); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 996884dcc9fd..b8210103de14 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -53,7 +53,6 @@ enum fault_type { KERNEL_FAULT, USER_FAULT, - VDSO_FAULT, GMAP_FAULT, }; @@ -77,22 +76,16 @@ static enum fault_type get_fault_type(struct pt_regs *regs) trans_exc_code = regs->int_parm_long & 3; if (likely(trans_exc_code == 0)) { /* primary space exception */ - if (IS_ENABLED(CONFIG_PGSTE) && - test_pt_regs_flag(regs, PIF_GUEST_FAULT)) - return GMAP_FAULT; - if (current->thread.mm_segment == USER_DS) + if (user_mode(regs)) return USER_FAULT; - return KERNEL_FAULT; - } - if (trans_exc_code == 2) { - /* secondary space exception */ - if (current->thread.mm_segment & 1) { - if (current->thread.mm_segment == USER_DS_SACF) - return USER_FAULT; + if (!IS_ENABLED(CONFIG_PGSTE)) return KERNEL_FAULT; - } - return VDSO_FAULT; + if (test_pt_regs_flag(regs, PIF_GUEST_FAULT)) + return GMAP_FAULT; + return KERNEL_FAULT; } + if (trans_exc_code == 2) + return USER_FAULT; if (trans_exc_code == 1) { /* access register mode, not used in the kernel */ return USER_FAULT; @@ -188,10 +181,6 @@ static void dump_fault_info(struct pt_regs *regs) asce = S390_lowcore.user_asce; pr_cont("user "); break; - case VDSO_FAULT: - asce = S390_lowcore.vdso_asce; - pr_cont("vdso "); - break; case GMAP_FAULT: asce = ((struct gmap *) S390_lowcore.gmap)->asce; pr_cont("gmap "); @@ -414,9 +403,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) switch (type) { case KERNEL_FAULT: goto out; - case VDSO_FAULT: - fault = VM_FAULT_BADMAP; - goto out; case USER_FAULT: case GMAP_FAULT: if (faulthandler_disabled() || !mm) @@ -834,7 +820,6 @@ void do_secure_storage_access(struct pt_regs *regs) if (rc) BUG(); break; - case VDSO_FAULT: case GMAP_FAULT: default: do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 11d2c8395e2a..4e87c819ddea 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -70,19 +70,10 @@ static void __crst_table_upgrade(void *arg) { struct mm_struct *mm = arg; - /* we must change all active ASCEs to avoid the creation of new TLBs */ + /* change all active ASCEs to avoid the creation of new TLBs */ if (current->active_mm == mm) { S390_lowcore.user_asce = mm->context.asce; - if (current->thread.mm_segment == USER_DS) { - __ctl_load(S390_lowcore.user_asce, 1, 1); - /* Mark user-ASCE present in CR1 */ - clear_cpu_flag(CIF_ASCE_PRIMARY); - } - if (current->thread.mm_segment == USER_DS_SACF) { - __ctl_load(S390_lowcore.user_asce, 7, 7); - /* enable_sacf_uaccess does all or nothing */ - WARN_ON(!test_cpu_flag(CIF_ASCE_SECONDARY)); - } + __ctl_load(S390_lowcore.user_asce, 7, 7); } __tlb_flush_local(); } diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 1a6adbc68ee8..de3bdbed8881 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -93,12 +93,10 @@ static inline int __memcpy_toio_inuser(void __iomem *dst, { int size, rc = 0; u8 status = 0; - mm_segment_t old_fs; if (!src) return -EINVAL; - old_fs = enable_sacf_uaccess(); while (n > 0) { size = zpci_get_max_write_size((u64 __force) dst, (u64 __force) src, n, @@ -113,7 +111,6 @@ static inline int __memcpy_toio_inuser(void __iomem *dst, dst += size; n -= size; } - disable_sacf_uaccess(old_fs); if (rc) zpci_err_mmio(rc, status, (__force u64) dst); return rc; @@ -246,9 +243,7 @@ static inline int __memcpy_fromio_inuser(void __user *dst, { int size, rc = 0; u8 status; - mm_segment_t old_fs; - old_fs = enable_sacf_uaccess(); while (n > 0) { size = zpci_get_max_write_size((u64 __force) src, (u64 __force) dst, n, @@ -260,7 +255,6 @@ static inline int __memcpy_fromio_inuser(void __user *dst, dst += size; n -= size; } - disable_sacf_uaccess(old_fs); if (rc) zpci_err_mmio(rc, status, (__force u64) dst); return rc; -- cgit v1.2.3-70-g09d2 From 0290c9e328e04052e317171953feb18177a34aed Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 16 Nov 2020 08:06:41 +0100 Subject: s390/mm: use invalid asce instead of kernel asce Create a region 3 page table which contains only invalid entries, and use that via "s390_invalid_asce" instead of the kernel ASCE whenever there is either - no user address space available, e.g. during early startup - as an intermediate ASCE when address spaces are switched This makes sure that user space accesses in such situations are guaranteed to fail. Reviewed-by: Sven Schnelle Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/include/asm/mmu_context.h | 2 +- arch/s390/include/asm/pgtable.h | 1 + arch/s390/kernel/smp.c | 2 +- arch/s390/kernel/vmlinux.lds.S | 3 ++- arch/s390/mm/init.c | 10 ++++++++-- 5 files changed, 13 insertions(+), 5 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 51def960a3dd..87a84fc59fc3 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -79,7 +79,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, S390_lowcore.user_asce = next->context.asce; cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); /* Clear previous user-ASCE from CR7 */ - __ctl_load(S390_lowcore.kernel_asce, 7, 7); + __ctl_load(s390_invalid_asce, 7, 7); if (prev != next) cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); } diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index a8edd96b2103..794746a32806 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -23,6 +23,7 @@ extern pgd_t swapper_pg_dir[]; extern void paging_init(void); +extern unsigned long s390_invalid_asce; enum { PG_DIRECT_MAP_4K = 0, diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index cac96f240dd8..7f7d81f19292 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -259,7 +259,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->spinlock_index = 0; lc->percpu_offset = __per_cpu_offset[cpu]; lc->kernel_asce = S390_lowcore.kernel_asce; - lc->user_asce = S390_lowcore.kernel_asce; + lc->user_asce = s390_invalid_asce; lc->machine_flags = S390_lowcore.machine_flags; lc->user_timer = lc->system_timer = lc->steal_timer = lc->avg_steal_timer = 0; diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 177ccfbda40a..4c0e19145cc6 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -10,7 +10,8 @@ * Put .bss..swapper_pg_dir as the first thing in .bss. This will * make sure it has 16k alignment. */ -#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir) +#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir) \ + *(.bss..invalid_pg_dir) /* Handle ro_after_init data on our own. */ #define RO_AFTER_INIT_DATA diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 69e6e2a5072e..73a163065b95 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -49,6 +49,9 @@ #include pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir"); +static pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir"); + +unsigned long s390_invalid_asce; unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); @@ -92,6 +95,9 @@ void __init paging_init(void) unsigned long pgd_type, asce_bits; psw_t psw; + s390_invalid_asce = (unsigned long)invalid_pg_dir; + s390_invalid_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; + crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY); init_mm.pgd = swapper_pg_dir; if (VMALLOC_END > _REGION2_SIZE) { asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; @@ -102,14 +108,14 @@ void __init paging_init(void) } init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; S390_lowcore.kernel_asce = init_mm.context.asce; - S390_lowcore.user_asce = S390_lowcore.kernel_asce; + S390_lowcore.user_asce = s390_invalid_asce; crst_table_init((unsigned long *) init_mm.pgd, pgd_type); vmem_map_init(); kasan_copy_shadow_mapping(); /* enable virtual mapping in kernel mode */ __ctl_load(S390_lowcore.kernel_asce, 1, 1); - __ctl_load(S390_lowcore.kernel_asce, 7, 7); + __ctl_load(S390_lowcore.user_asce, 7, 7); __ctl_load(S390_lowcore.kernel_asce, 13, 13); psw.mask = __extract_psw(); psw_bits(psw).dat = 1; -- cgit v1.2.3-70-g09d2 From 062e527956d05fae02f143c0d5ff9e8525c6799f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 16 Nov 2020 08:06:41 +0100 Subject: s390/mm: add debug user asce support Verify on exit to user space that always - the primary ASCE (cr1) is set to kernel ASCE - the secondary ASCE (cr7) is set to user ASCE If this is not the case: panic since something went terribly wrong. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/Kconfig.debug | 8 ++++++++ arch/s390/configs/debug_defconfig | 1 + arch/s390/include/asm/uaccess.h | 2 ++ arch/s390/kernel/entry.S | 8 ++++++++ arch/s390/lib/uaccess.c | 16 ++++++++++++++++ 5 files changed, 35 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index ab48b694ade8..6bfaceebbbc0 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -5,3 +5,11 @@ config TRACE_IRQFLAGS_SUPPORT config EARLY_PRINTK def_bool y + +config DEBUG_USER_ASCE + bool "Debug User ASCE" + help + Check on exit to user space that address space control + elements are setup correctly. + + If unsure, say N. diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index fe6f529ac82c..c52113a238b1 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -826,6 +826,7 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y +CONFIG_DEBUG_USER_ASCE=y CONFIG_NOTIFIER_ERROR_INJECTION=m CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m CONFIG_FAULT_INJECTION=y diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index e59fd96a1561..c6707885e7c2 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -18,6 +18,8 @@ #include #include +void debug_user_asce(void); + static inline int __range_ok(unsigned long addr, unsigned long size) { return 1; diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index d43ef46dc683..377f75616693 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -90,6 +90,12 @@ _LPP_OFFSET = __LC_LPP #endif .endm + .macro DEBUG_USER_ASCE +#ifdef CONFIG_DEBUG_USER_ASCE + brasl %r14,debug_user_asce +#endif + .endm + .macro CHECK_VMAP_STACK savearea,oklabel #ifdef CONFIG_VMAP_STACK lgr %r14,%r15 @@ -428,6 +434,7 @@ ENTRY(system_call) jnz .Lsysc_work TSTMSK __TI_flags(%r12),_TIF_WORK jnz .Lsysc_work # check for work + DEBUG_USER_ASCE lctlg %c1,%c1,__LC_USER_ASCE BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP TSTMSK __LC_CPU_FLAGS, _CIF_FPU @@ -793,6 +800,7 @@ ENTRY(io_int_handler) mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) tm __PT_PSW+1(%r11),0x01 # returning to user ? jno .Lio_exit_kernel + DEBUG_USER_ASCE lctlg %c1,%c1,__LC_USER_ASCE BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP stpt __LC_EXIT_TIMER diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 0ffbe1fad72a..e8f642446fed 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -16,6 +16,22 @@ #include #include +#ifdef CONFIG_DEBUG_USER_ASCE +void debug_user_asce(void) +{ + unsigned long cr1, cr7; + + __ctl_store(cr1, 1, 1); + __ctl_store(cr7, 7, 7); + if (cr1 == S390_lowcore.kernel_asce && cr7 == S390_lowcore.user_asce) + return; + panic("incorrect ASCE on kernel exit\n" + "cr1: %016lx cr7: %016lx\n" + "kernel: %016llx user: %016llx\n", + cr1, cr7, S390_lowcore.kernel_asce, S390_lowcore.user_asce); +} +#endif /*CONFIG_DEBUG_USER_ASCE */ + #ifndef CONFIG_HAVE_MARCH_Z10_FEATURES static DEFINE_STATIC_KEY_FALSE(have_mvcos); -- cgit v1.2.3-70-g09d2 From 80f06306240e0ad1c75116111be11950474dfda7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 16 Nov 2020 08:06:41 +0100 Subject: s390/vdso: reimplement getcpu vdso syscall Implement the previously removed getcpu vdso syscall by using the TOD programmable field to pass the cpu number to user space. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/include/asm/timex.h | 7 +++++++ arch/s390/include/asm/vdso.h | 2 ++ arch/s390/kernel/smp.c | 2 ++ arch/s390/kernel/vdso.c | 7 +++++++ arch/s390/kernel/vdso64/Makefile | 3 ++- arch/s390/kernel/vdso64/getcpu.c | 20 ++++++++++++++++++++ arch/s390/kernel/vdso64/vdso64.lds.S | 2 +- arch/s390/kernel/vdso64/vdso_user_wrapper.S | 1 + 8 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 arch/s390/kernel/vdso64/getcpu.c (limited to 'arch/s390') diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 289aaff4d365..c8e244ecdfde 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -49,6 +49,13 @@ static inline void set_clock_comparator(__u64 time) asm volatile("sckc %0" : : "Q" (time)); } +static inline void set_tod_programmable_field(u16 val) +{ + register unsigned long reg0 asm("0") = val; + + asm volatile("sckpf" : : "d" (reg0)); +} + void clock_comparator_work(void); void __init time_early_init(void); diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 9b299c05abf1..f65590889054 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -14,5 +14,7 @@ extern struct vdso_data *vdso_data; +void vdso_getcpu_init(void); + #endif /* __ASSEMBLY__ */ #endif /* __S390_VDSO_H__ */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 7f7d81f19292..647226e50c80 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -54,6 +54,7 @@ #include #include #include +#include #include "entry.h" enum { @@ -858,6 +859,7 @@ static void smp_init_secondary(void) preempt_disable(); init_cpu_timer(); vtime_init(); + vdso_getcpu_init(); pfault_init(); notify_cpu_starting(cpu); if (topology_cpu_dedicated(cpu)) diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 53c983f1ea51..aef2edff9959 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -29,6 +29,7 @@ #include #include #include +#include extern char vdso64_start, vdso64_end; static void *vdso64_kbase = &vdso64_start; @@ -100,6 +101,11 @@ static union { } vdso_data_store __page_aligned_data; struct vdso_data *vdso_data = (struct vdso_data *)&vdso_data_store.data; +void vdso_getcpu_init(void) +{ + set_tod_programmable_field(smp_processor_id()); +} + /* * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree @@ -170,6 +176,7 @@ static int __init vdso_init(void) { int i; + vdso_getcpu_init(); /* Calculate the size of the 64 bit vDSO */ vdso64_pages = ((&vdso64_end - &vdso64_start + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index b0cf58ae82fe..a6e0fb6b91d6 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -7,7 +7,8 @@ ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT include $(srctree)/lib/vdso/Makefile obj-vdso64 = vdso_user_wrapper.o note.o -obj-cvdso64 = vdso64_generic.o +obj-cvdso64 = vdso64_generic.o getcpu.o +CFLAGS_REMOVE_getcpu.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) CFLAGS_REMOVE_vdso64_generic.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) # Build rules diff --git a/arch/s390/kernel/vdso64/getcpu.c b/arch/s390/kernel/vdso64/getcpu.c new file mode 100644 index 000000000000..a5da7a9eb43d --- /dev/null +++ b/arch/s390/kernel/vdso64/getcpu.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright IBM Corp. 2020 */ + +#include +#include +#include + +int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) +{ + __u16 todval[8]; + + /* CPU number is stored in the programmable field of the TOD clock */ + get_tod_clock_ext((char *)todval); + if (cpu) + *cpu = todval[7]; + /* NUMA node is always zero */ + if (node) + *node = 0; + return 0; +} diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index b59006584a9d..7bde3909290f 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -135,7 +135,7 @@ VERSION __kernel_gettimeofday; __kernel_clock_gettime; __kernel_clock_getres; - + __kernel_getcpu; local: *; }; } diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S index a775d7e52872..f773505c7e63 100644 --- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S +++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S @@ -36,3 +36,4 @@ __kernel_\func: vdso_func gettimeofday vdso_func clock_getres vdso_func clock_gettime +vdso_func getcpu -- cgit v1.2.3-70-g09d2 From 10e59217479d733ff50b6e8c6316ecffe8b857cb Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 19 Nov 2020 11:38:46 +0100 Subject: s390/Kconfig: default PCI_NR_FUNCTIONS to 512 With the addition of more complete SR-IOV support, we are recommending to raise this limit for distributions to 512, as the previous default of 128 can easily be hit with just the VFs of a single PCI physical function. With at least one distribution now shipping with this, supporting only one fourth as many PCI functions on a default upstream build may lead to confusion and reduced testing of the higher limit so increase the default to 512. Acked-by: Christian Borntraeger Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 64c72b1ecc77..a60cc523d810 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -712,7 +712,7 @@ if PCI config PCI_NR_FUNCTIONS int "Maximum number of PCI functions (1-4096)" range 1 4096 - default "128" + default "512" help This allows you to specify the maximum number of PCI functions which this kernel will support. -- cgit v1.2.3-70-g09d2 From 1ab3001b6efb78e0293f3f02307720f6094db1ae Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 24 Nov 2020 14:02:35 +0100 Subject: s390/vdso: add missing prototypes for vdso functions clang W=1 warns about missing prototypes: >> arch/s390/kernel/vdso64/getcpu.c:8:5: warning: no previous prototype for function '__s390_vdso_getcpu' [-Wmissing-prototypes] int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) ^ Add a local header file in order to get rid of this warnings. Reported-by: kernel test robot Signed-off-by: Heiko Carstens --- arch/s390/kernel/vdso64/getcpu.c | 1 + arch/s390/kernel/vdso64/vdso.h | 14 ++++++++++++++ arch/s390/kernel/vdso64/vdso64_generic.c | 1 + 3 files changed, 16 insertions(+) create mode 100644 arch/s390/kernel/vdso64/vdso.h (limited to 'arch/s390') diff --git a/arch/s390/kernel/vdso64/getcpu.c b/arch/s390/kernel/vdso64/getcpu.c index a5da7a9eb43d..5b2bc7494d5b 100644 --- a/arch/s390/kernel/vdso64/getcpu.c +++ b/arch/s390/kernel/vdso64/getcpu.c @@ -4,6 +4,7 @@ #include #include #include +#include "vdso.h" int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) { diff --git a/arch/s390/kernel/vdso64/vdso.h b/arch/s390/kernel/vdso64/vdso.h new file mode 100644 index 000000000000..34c7a2312f9d --- /dev/null +++ b/arch/s390/kernel/vdso64/vdso.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ARCH_S390_KERNEL_VDSO64_VDSO_H +#define __ARCH_S390_KERNEL_VDSO64_VDSO_H + +#include + +struct getcpu_cache; + +int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused); +int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); +int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); +int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts); + +#endif /* __ARCH_S390_KERNEL_VDSO64_VDSO_H */ diff --git a/arch/s390/kernel/vdso64/vdso64_generic.c b/arch/s390/kernel/vdso64/vdso64_generic.c index a8cef7e4d137..a9aa75643c08 100644 --- a/arch/s390/kernel/vdso64/vdso64_generic.c +++ b/arch/s390/kernel/vdso64/vdso64_generic.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "../../../../lib/vdso/gettimeofday.c" +#include "vdso.h" int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) -- cgit v1.2.3-70-g09d2 From 8cc0dcfdc1c0e0be107d0288f9c0cf1f4201be62 Mon Sep 17 00:00:00 2001 From: Vineeth Vijayan Date: Fri, 20 Nov 2020 09:36:38 +0100 Subject: s390/cio: remove pm support from ccw bus driver As part of removing broken pm-support from s390 arch, remove the pm callbacks from ccw-bus driver.The power-management functions are unused since the 'commit 394216275c7d ("s390: remove broken hibernate / power management support")'. Signed-off-by: Vineeth Vijayan Reviewed-by: Peter Oberparleiter Signed-off-by: Heiko Carstens --- arch/s390/include/asm/ccwdev.h | 10 -- drivers/s390/cio/cmf.c | 5 - drivers/s390/cio/device.c | 247 +---------------------------------------- drivers/s390/cio/device.h | 1 - drivers/s390/cio/device_fsm.c | 6 - drivers/s390/cio/io_sch.h | 1 - 6 files changed, 2 insertions(+), 268 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index 069709b8e9e7..e3e2ab0acf83 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -124,11 +124,6 @@ enum uc_todo { * @notify: notify driver of device state changes * @path_event: notify driver of channel path events * @shutdown: called at device shutdown - * @prepare: prepare for pm state transition - * @complete: undo work done in @prepare - * @freeze: callback for freezing during hibernation snapshotting - * @thaw: undo work done in @freeze - * @restore: callback for restoring after hibernation * @uc_handler: callback for unit check handler * @driver: embedded device driver structure * @int_class: interruption class to use for accounting interrupts @@ -142,11 +137,6 @@ struct ccw_driver { int (*notify) (struct ccw_device *, int); void (*path_event) (struct ccw_device *, int *); void (*shutdown) (struct ccw_device *); - int (*prepare) (struct ccw_device *); - void (*complete) (struct ccw_device *); - int (*freeze)(struct ccw_device *); - int (*thaw) (struct ccw_device *); - int (*restore)(struct ccw_device *); enum uc_todo (*uc_handler) (struct ccw_device *, struct irb *); struct device_driver driver; enum interruption_class int_class; diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c index 72dd2471ec1e..b7b590646d58 100644 --- a/drivers/s390/cio/cmf.c +++ b/drivers/s390/cio/cmf.c @@ -1109,11 +1109,6 @@ static ssize_t cmb_enable_store(struct device *dev, } DEVICE_ATTR_RW(cmb_enable); -int ccw_set_cmf(struct ccw_device *cdev, int enable) -{ - return cmbops->set(cdev, enable ? 2 : 0); -} - /** * enable_cmf() - switch on the channel measurement for a specific device * @cdev: The ccw device to be enabled diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 9b8ebb29e007..5fbc549786ab 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1408,7 +1408,7 @@ static enum io_sch_action sch_get_action(struct subchannel *sch) } if (device_is_disconnected(cdev)) return IO_SCH_REPROBE; - if (cdev->online && !cdev->private->flags.resuming) + if (cdev->online) return IO_SCH_VERIFY; if (cdev->private->state == DEV_STATE_NOT_OPER) return IO_SCH_UNREG_ATTACH; @@ -1500,11 +1500,6 @@ static int io_subchannel_sch_event(struct subchannel *sch, int process) break; case IO_SCH_UNREG_ATTACH: spin_lock_irqsave(sch->lock, flags); - if (cdev->private->flags.resuming) { - /* Device will be handled later. */ - rc = 0; - goto out_unlock; - } sch_set_cdev(sch, NULL); spin_unlock_irqrestore(sch->lock, flags); /* Unregister ccw device. */ @@ -1517,7 +1512,7 @@ static int io_subchannel_sch_event(struct subchannel *sch, int process) switch (action) { case IO_SCH_ORPH_UNREG: case IO_SCH_UNREG: - if (!cdev || !cdev->private->flags.resuming) + if (!cdev) css_sch_device_unregister(sch); break; case IO_SCH_ORPH_ATTACH: @@ -1676,14 +1671,6 @@ void ccw_device_wait_idle(struct ccw_device *cdev) udelay_simple(100); } } - -static int ccw_device_pm_restore(struct device *dev); - -int ccw_device_force_console(struct ccw_device *cdev) -{ - return ccw_device_pm_restore(&cdev->dev); -} -EXPORT_SYMBOL_GPL(ccw_device_force_console); #endif /** @@ -1784,235 +1771,6 @@ static void ccw_device_shutdown(struct device *dev) __disable_cmf(cdev); } -static int ccw_device_pm_prepare(struct device *dev) -{ - struct ccw_device *cdev = to_ccwdev(dev); - - if (work_pending(&cdev->private->todo_work)) - return -EAGAIN; - /* Fail while device is being set online/offline. */ - if (atomic_read(&cdev->private->onoff)) - return -EAGAIN; - - if (cdev->online && cdev->drv && cdev->drv->prepare) - return cdev->drv->prepare(cdev); - - return 0; -} - -static void ccw_device_pm_complete(struct device *dev) -{ - struct ccw_device *cdev = to_ccwdev(dev); - - if (cdev->online && cdev->drv && cdev->drv->complete) - cdev->drv->complete(cdev); -} - -static int ccw_device_pm_freeze(struct device *dev) -{ - struct ccw_device *cdev = to_ccwdev(dev); - struct subchannel *sch = to_subchannel(cdev->dev.parent); - int ret, cm_enabled; - - /* Fail suspend while device is in transistional state. */ - if (!dev_fsm_final_state(cdev)) - return -EAGAIN; - if (!cdev->online) - return 0; - if (cdev->drv && cdev->drv->freeze) { - ret = cdev->drv->freeze(cdev); - if (ret) - return ret; - } - - spin_lock_irq(sch->lock); - cm_enabled = cdev->private->cmb != NULL; - spin_unlock_irq(sch->lock); - if (cm_enabled) { - /* Don't have the css write on memory. */ - ret = ccw_set_cmf(cdev, 0); - if (ret) - return ret; - } - /* From here on, disallow device driver I/O. */ - spin_lock_irq(sch->lock); - ret = cio_disable_subchannel(sch); - spin_unlock_irq(sch->lock); - - return ret; -} - -static int ccw_device_pm_thaw(struct device *dev) -{ - struct ccw_device *cdev = to_ccwdev(dev); - struct subchannel *sch = to_subchannel(cdev->dev.parent); - int ret, cm_enabled; - - if (!cdev->online) - return 0; - - spin_lock_irq(sch->lock); - /* Allow device driver I/O again. */ - ret = cio_enable_subchannel(sch, (u32)(addr_t)sch); - cm_enabled = cdev->private->cmb != NULL; - spin_unlock_irq(sch->lock); - if (ret) - return ret; - - if (cm_enabled) { - ret = ccw_set_cmf(cdev, 1); - if (ret) - return ret; - } - - if (cdev->drv && cdev->drv->thaw) - ret = cdev->drv->thaw(cdev); - - return ret; -} - -static void __ccw_device_pm_restore(struct ccw_device *cdev) -{ - struct subchannel *sch = to_subchannel(cdev->dev.parent); - - spin_lock_irq(sch->lock); - if (cio_is_console(sch->schid)) { - cio_enable_subchannel(sch, (u32)(addr_t)sch); - goto out_unlock; - } - /* - * While we were sleeping, devices may have gone or become - * available again. Kick re-detection. - */ - cdev->private->flags.resuming = 1; - cdev->private->path_new_mask = LPM_ANYPATH; - css_sched_sch_todo(sch, SCH_TODO_EVAL); - spin_unlock_irq(sch->lock); - css_wait_for_slow_path(); - - /* cdev may have been moved to a different subchannel. */ - sch = to_subchannel(cdev->dev.parent); - spin_lock_irq(sch->lock); - if (cdev->private->state != DEV_STATE_ONLINE && - cdev->private->state != DEV_STATE_OFFLINE) - goto out_unlock; - - ccw_device_recognition(cdev); - spin_unlock_irq(sch->lock); - wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev) || - cdev->private->state == DEV_STATE_DISCONNECTED); - spin_lock_irq(sch->lock); - -out_unlock: - cdev->private->flags.resuming = 0; - spin_unlock_irq(sch->lock); -} - -static int resume_handle_boxed(struct ccw_device *cdev) -{ - cdev->private->state = DEV_STATE_BOXED; - if (ccw_device_notify(cdev, CIO_BOXED) == NOTIFY_OK) - return 0; - ccw_device_sched_todo(cdev, CDEV_TODO_UNREG); - return -ENODEV; -} - -static int resume_handle_disc(struct ccw_device *cdev) -{ - cdev->private->state = DEV_STATE_DISCONNECTED; - if (ccw_device_notify(cdev, CIO_GONE) == NOTIFY_OK) - return 0; - ccw_device_sched_todo(cdev, CDEV_TODO_UNREG); - return -ENODEV; -} - -static int ccw_device_pm_restore(struct device *dev) -{ - struct ccw_device *cdev = to_ccwdev(dev); - struct subchannel *sch; - int ret = 0; - - __ccw_device_pm_restore(cdev); - sch = to_subchannel(cdev->dev.parent); - spin_lock_irq(sch->lock); - if (cio_is_console(sch->schid)) - goto out_restore; - - /* check recognition results */ - switch (cdev->private->state) { - case DEV_STATE_OFFLINE: - case DEV_STATE_ONLINE: - cdev->private->flags.donotify = 0; - break; - case DEV_STATE_BOXED: - ret = resume_handle_boxed(cdev); - if (ret) - goto out_unlock; - goto out_restore; - default: - ret = resume_handle_disc(cdev); - if (ret) - goto out_unlock; - goto out_restore; - } - /* check if the device type has changed */ - if (!ccw_device_test_sense_data(cdev)) { - ccw_device_update_sense_data(cdev); - ccw_device_sched_todo(cdev, CDEV_TODO_REBIND); - ret = -ENODEV; - goto out_unlock; - } - if (!cdev->online) - goto out_unlock; - - if (ccw_device_online(cdev)) { - ret = resume_handle_disc(cdev); - if (ret) - goto out_unlock; - goto out_restore; - } - spin_unlock_irq(sch->lock); - wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev)); - spin_lock_irq(sch->lock); - - if (ccw_device_notify(cdev, CIO_OPER) == NOTIFY_BAD) { - ccw_device_sched_todo(cdev, CDEV_TODO_UNREG); - ret = -ENODEV; - goto out_unlock; - } - - /* reenable cmf, if needed */ - if (cdev->private->cmb) { - spin_unlock_irq(sch->lock); - ret = ccw_set_cmf(cdev, 1); - spin_lock_irq(sch->lock); - if (ret) { - CIO_MSG_EVENT(2, "resume: cdev 0.%x.%04x: cmf failed " - "(rc=%d)\n", cdev->private->dev_id.ssid, - cdev->private->dev_id.devno, ret); - ret = 0; - } - } - -out_restore: - spin_unlock_irq(sch->lock); - if (cdev->online && cdev->drv && cdev->drv->restore) - ret = cdev->drv->restore(cdev); - return ret; - -out_unlock: - spin_unlock_irq(sch->lock); - return ret; -} - -static const struct dev_pm_ops ccw_pm_ops = { - .prepare = ccw_device_pm_prepare, - .complete = ccw_device_pm_complete, - .freeze = ccw_device_pm_freeze, - .thaw = ccw_device_pm_thaw, - .restore = ccw_device_pm_restore, -}; - static struct bus_type ccw_bus_type = { .name = "ccw", .match = ccw_bus_match, @@ -2020,7 +1778,6 @@ static struct bus_type ccw_bus_type = { .probe = ccw_device_probe, .remove = ccw_device_remove, .shutdown = ccw_device_shutdown, - .pm = &ccw_pm_ops, }; /** diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h index 853b6a8ca095..24b2fce69590 100644 --- a/drivers/s390/cio/device.h +++ b/drivers/s390/cio/device.h @@ -143,6 +143,5 @@ void retry_set_schib(struct ccw_device *cdev); void cmf_retry_copy_block(struct ccw_device *); int cmf_reenable(struct ccw_device *); void cmf_reactivate(void); -int ccw_set_cmf(struct ccw_device *cdev, int enable); extern struct device_attribute dev_attr_cmb_enable; #endif diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 8fc267324ebb..6420b197bb05 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -224,12 +224,6 @@ ccw_device_recog_done(struct ccw_device *cdev, int state) wake_up(&cdev->private->wait_q); return; } - if (cdev->private->flags.resuming) { - cdev->private->state = state; - cdev->private->flags.recog_done = 1; - wake_up(&cdev->private->wait_q); - return; - } switch (state) { case DEV_STATE_NOT_OPER: break; diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h index c03b4a19974e..85a11c1836e5 100644 --- a/drivers/s390/cio/io_sch.h +++ b/drivers/s390/cio/io_sch.h @@ -160,7 +160,6 @@ struct ccw_device_private { unsigned int donotify:1; /* call notify function */ unsigned int recog_done:1; /* dev. recog. complete */ unsigned int fake_irb:2; /* deliver faked irb */ - unsigned int resuming:1; /* recognition while resume */ unsigned int pgroup:1; /* pathgroup is set up */ unsigned int mpath:1; /* multipathing is set up */ unsigned int pgid_unknown:1;/* unknown pgid state */ -- cgit v1.2.3-70-g09d2 From 613775d62ec60202f98d2c5f520e6e9ba6dd4ac4 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Thu, 26 Nov 2020 18:31:08 +0100 Subject: s390/kexec_file: fix diag308 subcode when loading crash kernel diag308 subcode 0 performes a clear reset which inlcudes the reset of all registers in the system. While this is the preferred behavior when loading a normal kernel via kexec it prevents the crash kernel to store the register values in the dump. To prevent this use subcode 1 when loading a crash kernel instead. Fixes: ee337f5469fd ("s390/kexec_file: Add crash support to image loader") Cc: # 4.17 Signed-off-by: Philipp Rudo Reported-by: Xiaoying Yan Tested-by: Lianbo Jiang Signed-off-by: Heiko Carstens --- arch/s390/purgatory/head.S | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S index 5a10ce34b95d..3d1c31e0cf3d 100644 --- a/arch/s390/purgatory/head.S +++ b/arch/s390/purgatory/head.S @@ -62,14 +62,15 @@ jh 10b .endm -.macro START_NEXT_KERNEL base +.macro START_NEXT_KERNEL base subcode lg %r4,kernel_entry-\base(%r13) lg %r5,load_psw_mask-\base(%r13) ogr %r4,%r5 stg %r4,0(%r0) xgr %r0,%r0 - diag %r0,%r0,0x308 + lghi %r1,\subcode + diag %r0,%r1,0x308 .endm .text @@ -123,7 +124,7 @@ ENTRY(purgatory_start) je .start_crash_kernel /* start normal kernel */ - START_NEXT_KERNEL .base_crash + START_NEXT_KERNEL .base_crash 0 .return_old_kernel: lmg %r6,%r15,gprregs-.base_crash(%r13) @@ -227,7 +228,7 @@ ENTRY(purgatory_start) MEMCPY %r9,%r10,%r11 /* start crash kernel */ - START_NEXT_KERNEL .base_dst + START_NEXT_KERNEL .base_dst 1 load_psw_mask: -- cgit v1.2.3-70-g09d2 From 7a84ffc0471f0ff59f9ae10807a77bd666e24fd0 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Wed, 25 Nov 2020 19:08:23 +0100 Subject: s390/boot: add build-id to decompressor More and more functionality from the early boot phase gets carried over to the decompressor. With this the complexity of the code and thus the chance to introduce bugs increases. In order to be able to debug these early boot bugs the distributions have to package the decompressors vmlinux together with the other debuginfos. However for that the distributions require the vmlinux to contain a build-id. Per default the section containing the build-id is placed first in the section table. So make sure to move it behind the .text section otherwise the image would be unbootable. Signed-off-by: Philipp Rudo Signed-off-by: Heiko Carstens --- arch/s390/boot/compressed/Makefile | 2 +- arch/s390/boot/compressed/vmlinux.lds.S | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index e0502222706a..de18dab518bb 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -23,7 +23,7 @@ OBJCOPYFLAGS := OBJECTS := $(addprefix $(obj)/,$(obj-y)) OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all)) -LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T +LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup --build-id=sha1 -T $(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS_ALL) FORCE $(call if_changed,ld) diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S index 152ac6f875c0..27a09c1c78f6 100644 --- a/arch/s390/boot/compressed/vmlinux.lds.S +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -27,6 +27,7 @@ SECTIONS *(.rodata.*) _erodata = . ; } + NOTES .data : { _data = . ; *(.data) -- cgit v1.2.3-70-g09d2 From e259b3fafa7de362b04ecd86e7fa9a9e9273e5fb Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Thu, 3 Dec 2020 22:03:32 +0100 Subject: s390/idle: add missing mt_cycles calculation During removal of the critical section cleanup the calculation of mt_cycles during idle was removed. This causes invalid accounting on systems with SMT enabled. Fixes: 0b0ed657fe00 ("s390: remove critical section cleanup from entry.S") Cc: # 5.8 Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.S | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 377f75616693..e7f7aab805e1 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -118,7 +118,7 @@ _LPP_OFFSET = __LC_LPP .macro SWITCH_ASYNC savearea,timer tmhh %r8,0x0001 # interrupting from user ? - jnz 2f + jnz 4f #if IS_ENABLED(CONFIG_KVM) lgr %r14,%r9 larl %r13,.Lsie_gmap @@ -131,9 +131,25 @@ _LPP_OFFSET = __LC_LPP #endif 0: larl %r13,.Lpsw_idle_exit cgr %r13,%r9 - jne 1f + jne 3f - mvc __CLOCK_IDLE_EXIT(8,%r2), __LC_INT_CLOCK + larl %r1,smp_cpu_mtid + llgf %r1,0(%r1) + ltgr %r1,%r1 + jz 2f # no SMT, skip mt_cycles calculation + .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+80(%r15) + larl %r3,mt_cycles + ag %r3,__LC_PERCPU_OFFSET + la %r4,__SF_EMPTY+16(%r15) +1: lg %r0,0(%r3) + slg %r0,0(%r4) + alg %r0,64(%r4) + stg %r0,0(%r3) + la %r3,8(%r3) + la %r4,8(%r4) + brct %r1,1b + +2: mvc __CLOCK_IDLE_EXIT(8,%r2), __LC_INT_CLOCK mvc __TIMER_IDLE_EXIT(8,%r2), __LC_ASYNC_ENTER_TIMER # account system time going idle ni __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT @@ -152,17 +168,17 @@ _LPP_OFFSET = __LC_LPP mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) nihh %r8,0xfcfd # clear wait state and irq bits -1: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? +3: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? slgr %r14,%r15 srag %r14,%r14,STACK_SHIFT - jnz 3f + jnz 5f CHECK_STACK \savearea aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 4f -2: UPDATE_VTIME %r14,%r15,\timer + j 6f +4: UPDATE_VTIME %r14,%r15,\timer BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP -3: lg %r15,__LC_ASYNC_STACK # load async stack -4: la %r11,STACK_FRAME_OVERHEAD(%r15) +5: lg %r15,__LC_ASYNC_STACK # load async stack +6: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm .macro UPDATE_VTIME w1,w2,enter_timer -- cgit v1.2.3-70-g09d2 From 454efcf82ea17d7efeb86ebaa20775a21ec87d27 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Sun, 6 Dec 2020 10:47:47 +0100 Subject: s390/idle: fix accounting with machine checks When a machine check interrupt is triggered during idle, the code is using the async timer/clock for idle time calculation. It should use the machine check enter timer/clock which is passed to the macro. Fixes: 0b0ed657fe00 ("s390: remove critical section cleanup from entry.S") Cc: # 5.8 Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.S | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index e7f7aab805e1..55f65f2b7daf 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -116,7 +116,7 @@ _LPP_OFFSET = __LC_LPP #endif .endm - .macro SWITCH_ASYNC savearea,timer + .macro SWITCH_ASYNC savearea,timer,clock tmhh %r8,0x0001 # interrupting from user ? jnz 4f #if IS_ENABLED(CONFIG_KVM) @@ -149,8 +149,8 @@ _LPP_OFFSET = __LC_LPP la %r4,8(%r4) brct %r1,1b -2: mvc __CLOCK_IDLE_EXIT(8,%r2), __LC_INT_CLOCK - mvc __TIMER_IDLE_EXIT(8,%r2), __LC_ASYNC_ENTER_TIMER +2: mvc __CLOCK_IDLE_EXIT(8,%r2), \clock + mvc __TIMER_IDLE_EXIT(8,%r2), \timer # account system time going idle ni __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT @@ -757,7 +757,7 @@ ENTRY(io_int_handler) stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r12,__LC_CURRENT lmg %r8,%r9,__LC_IO_OLD_PSW - SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER,__LC_INT_CLOCK stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use xgr %r0,%r0 @@ -952,7 +952,7 @@ ENTRY(ext_int_handler) stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r12,__LC_CURRENT lmg %r8,%r9,__LC_EXT_OLD_PSW - SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER,__LC_INT_CLOCK stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use xgr %r0,%r0 @@ -1183,7 +1183,7 @@ ENTRY(mcck_int_handler) TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID jno .Lmcck_panic 4: ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off - SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER + SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER,__LC_MCCK_CLOCK .Lmcck_skip: lghi %r14,__LC_GPREGS_SAVE_AREA+64 stmg %r0,%r7,__PT_R0(%r11) -- cgit v1.2.3-70-g09d2 From b4d70a6134d2152d692ccc873ff6fa3351631927 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 7 Dec 2020 20:56:38 +0100 Subject: s390/mm: use invalid asce for user space when switching to init_mm Currently only idle_task_exit() explicitly switches (switch_mm) to init_mm. This causes the kernel asce to be loaded into cr7 and therefore it would be used for potential user space accesses. This is currently no problem since idle_task_exit() is nearly the last thing a CPU executes before it is taken down. However things might change - and therefore make sure that always the invalid asce is used for cr7 when active_mm is init_mm. This makes sure that all potential user space accesses will fail, instead of accessing kernel address space. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/include/asm/mmu_context.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 87a84fc59fc3..5dc49c467319 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -76,7 +76,10 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, { int cpu = smp_processor_id(); - S390_lowcore.user_asce = next->context.asce; + if (next == &init_mm) + S390_lowcore.user_asce = s390_invalid_asce; + else + S390_lowcore.user_asce = next->context.asce; cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); /* Clear previous user-ASCE from CR7 */ __ctl_load(s390_invalid_asce, 7, 7); -- cgit v1.2.3-70-g09d2 From b5e438ebd7e808d1d2435159ac4742e01a94b8da Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 8 Dec 2020 07:35:21 +0100 Subject: s390/smp: perform initial CPU reset also for SMT siblings Not resetting the SMT siblings might leave them in unpredictable state. One of the observed problems was that the CPU timer wasn't reset and therefore large system time values where accounted during CPU bringup. Cc: # 4.0 Fixes: 10ad34bc76dfb ("s390: add SMT support") Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/kernel/smp.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 647226e50c80..27c763014114 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -890,24 +890,12 @@ static void __no_sanitize_address smp_start_secondary(void *cpuvoid) /* Upping and downing of CPUs */ int __cpu_up(unsigned int cpu, struct task_struct *tidle) { - struct pcpu *pcpu; - int base, i, rc; + struct pcpu *pcpu = pcpu_devices + cpu; + int rc; - pcpu = pcpu_devices + cpu; if (pcpu->state != CPU_STATE_CONFIGURED) return -EIO; - base = smp_get_base_cpu(cpu); - for (i = 0; i <= smp_cpu_mtid; i++) { - if (base + i < nr_cpu_ids) - if (cpu_online(base + i)) - break; - } - /* - * If this is the first CPU of the core to get online - * do an initial CPU reset. - */ - if (i > smp_cpu_mtid && - pcpu_sigp_retry(pcpu_devices + base, SIGP_INITIAL_CPU_RESET, 0) != + if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; -- cgit v1.2.3-70-g09d2 From ff98cc986ae883eec5f26af72d4e2406612fe683 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 3 Dec 2020 15:02:08 +0100 Subject: s390/crypto: add arch_get_random_long() support The random longs to be pulled by arch_get_random_long() are prepared in an 4K buffer which is filled from the NIST 800-90 compliant s390 drbg. By default the random long buffer is refilled 256 times before the drbg itself needs a reseed. The reseed of the drbg is done with 32 bytes fetched from the high quality (but slow) trng which is assumed to deliver 100% entropy. So the 32 * 8 = 256 bits of entropy are spread over 256 * 4KB = 1MB serving 131072 arch_get_random_long() invocations before reseeded. How often the 4K random long buffer is refilled with the drbg before the drbg is reseeded can be adjusted. There is a module parameter 's390_arch_rnd_long_drbg_reseed' accessible via /sys/module/arch_random/parameters/rndlong_drbg_reseed or as kernel command line parameter arch_random.rndlong_drbg_reseed= This parameter tells how often the drbg fills the 4K buffer before it is re-seeded by fresh entropy from the trng. A value of 16 results in reseeding the drbg at every 16 * 4 KB = 64 KB with 32 bytes of fresh entropy pulled from the trng. So a value of 16 would result in 256 bits entropy per 64 KB. A value of 256 results in 1MB of drbg output before a reseed of the drbg is done. So this would spread the 256 bits of entropy among 1MB. Setting this parameter to 0 forces the reseed to take place every time the 4K buffer is depleted, so the entropy rises to 256 bits entropy per 4K or 0.5 bit entropy per arch_get_random_long(). With setting this parameter to negative values all this effort is disabled, arch_get_random long() returns false and thus indicating that the arch_get_random_long() feature is disabled at all. arch_get_random_long() is used by random.c among others to provide an initial hash value to be mixed with the entropy pool on every random data pull. For about 64 bytes read from /dev/urandom there is one call to arch_get_random_long(). So these additional random long values count for performance of /dev/urandom with measurable but low penalty. Signed-off-by: Harald Freudenberger Reviewed-by: Ingo Franzki Reviewed-by: Juergen Christ Signed-off-by: Heiko Carstens --- arch/s390/crypto/arch_random.c | 110 ++++++++++++++++++++++++++++++++++++- arch/s390/include/asm/archrandom.h | 5 +- 2 files changed, 113 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index dd95cdbd22ce..7b947728d57e 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -2,7 +2,7 @@ /* * s390 arch random implementation. * - * Copyright IBM Corp. 2017, 2018 + * Copyright IBM Corp. 2017, 2020 * Author(s): Harald Freudenberger * * The s390_arch_random_generate() function may be called from random.c @@ -33,6 +33,7 @@ #include #include #include +#include #include DEFINE_STATIC_KEY_FALSE(s390_arch_random_available); @@ -99,6 +100,113 @@ static void arch_rng_refill_buffer(struct work_struct *unused) queue_delayed_work(system_long_wq, &arch_rng_work, delay); } +/* + * Here follows the implementation of s390_arch_get_random_long(). + * + * The random longs to be pulled by arch_get_random_long() are + * prepared in an 4K buffer which is filled from the NIST 800-90 + * compliant s390 drbg. By default the random long buffer is refilled + * 256 times before the drbg itself needs a reseed. The reseed of the + * drbg is done with 32 bytes fetched from the high quality (but slow) + * trng which is assumed to deliver 100% entropy. So the 32 * 8 = 256 + * bits of entropy are spread over 256 * 4KB = 1MB serving 131072 + * arch_get_random_long() invocations before reseeded. + * + * How often the 4K random long buffer is refilled with the drbg + * before the drbg is reseeded can be adjusted. There is a module + * parameter 's390_arch_rnd_long_drbg_reseed' accessible via + * /sys/module/arch_random/parameters/rndlong_drbg_reseed + * or as kernel command line parameter + * arch_random.rndlong_drbg_reseed= + * This parameter tells how often the drbg fills the 4K buffer before + * it is re-seeded by fresh entropy from the trng. + * A value of 16 results in reseeding the drbg at every 16 * 4 KB = 64 + * KB with 32 bytes of fresh entropy pulled from the trng. So a value + * of 16 would result in 256 bits entropy per 64 KB. + * A value of 256 results in 1MB of drbg output before a reseed of the + * drbg is done. So this would spread the 256 bits of entropy among 1MB. + * Setting this parameter to 0 forces the reseed to take place every + * time the 4K buffer is depleted, so the entropy rises to 256 bits + * entropy per 4K or 0.5 bit entropy per arch_get_random_long(). With + * setting this parameter to negative values all this effort is + * disabled, arch_get_random long() returns false and thus indicating + * that the arch_get_random_long() feature is disabled at all. + */ + +static unsigned long rndlong_buf[512]; +static DEFINE_SPINLOCK(rndlong_lock); +static int rndlong_buf_index; + +static int rndlong_drbg_reseed = 256; +module_param_named(rndlong_drbg_reseed, rndlong_drbg_reseed, int, 0600); +MODULE_PARM_DESC(rndlong_drbg_reseed, "s390 arch_get_random_long() drbg reseed"); + +static inline void refill_rndlong_buf(void) +{ + static u8 prng_ws[240]; + static int drbg_counter; + + if (--drbg_counter < 0) { + /* need to re-seed the drbg */ + u8 seed[32]; + + /* fetch seed from trng */ + cpacf_trng(NULL, 0, seed, sizeof(seed)); + /* seed drbg */ + memset(prng_ws, 0, sizeof(prng_ws)); + cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, + &prng_ws, NULL, 0, seed, sizeof(seed)); + /* re-init counter for drbg */ + drbg_counter = rndlong_drbg_reseed; + } + + /* fill the arch_get_random_long buffer from drbg */ + cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, &prng_ws, + (u8 *) rndlong_buf, sizeof(rndlong_buf), + NULL, 0); +} + +bool s390_arch_get_random_long(unsigned long *v) +{ + bool rc = false; + unsigned long flags; + + /* arch_get_random_long() disabled ? */ + if (rndlong_drbg_reseed < 0) + return false; + + /* try to lock the random long lock */ + if (!spin_trylock_irqsave(&rndlong_lock, flags)) + return false; + + if (--rndlong_buf_index >= 0) { + /* deliver next long value from the buffer */ + *v = rndlong_buf[rndlong_buf_index]; + rc = true; + goto out; + } + + /* buffer is depleted and needs refill */ + if (in_interrupt()) { + /* delay refill in interrupt context to next caller */ + rndlong_buf_index = 0; + goto out; + } + + /* refill random long buffer */ + refill_rndlong_buf(); + rndlong_buf_index = ARRAY_SIZE(rndlong_buf); + + /* and provide one random long */ + *v = rndlong_buf[--rndlong_buf_index]; + rc = true; + +out: + spin_unlock_irqrestore(&rndlong_lock, flags); + return rc; +} +EXPORT_SYMBOL(s390_arch_get_random_long); + static int __init s390_arch_random_init(void) { /* all the needed PRNO subfunctions available ? */ diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h index de61ce562052..5dc712fde3c7 100644 --- a/arch/s390/include/asm/archrandom.h +++ b/arch/s390/include/asm/archrandom.h @@ -2,7 +2,7 @@ /* * Kernel interface for the s390 arch_random_* functions * - * Copyright IBM Corp. 2017 + * Copyright IBM Corp. 2017, 2020 * * Author: Harald Freudenberger * @@ -19,10 +19,13 @@ DECLARE_STATIC_KEY_FALSE(s390_arch_random_available); extern atomic64_t s390_arch_random_counter; +bool s390_arch_get_random_long(unsigned long *v); bool s390_arch_random_generate(u8 *buf, unsigned int nbytes); static inline bool __must_check arch_get_random_long(unsigned long *v) { + if (static_branch_likely(&s390_arch_random_available)) + return s390_arch_get_random_long(v); return false; } -- cgit v1.2.3-70-g09d2 From 343dbdb7cb8997a2cb0fd804d6563b8a6de8d49b Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 8 Dec 2020 19:47:15 +0100 Subject: s390/mm: add support to allocate gigantic hugepages using CMA Commit cf11e85fc08c ("mm: hugetlb: optionally allocate gigantic hugepages using cma") added support for allocating gigantic hugepages using CMA, by specifying the hugetlb_cma= kernel parameter, which will disable any boot-time allocation of gigantic hugepages. This patch enables that option also for s390. Signed-off-by: Gerald Schaefer Signed-off-by: Heiko Carstens --- arch/s390/kernel/setup.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 756936785598..1f16a03be995 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -1146,6 +1147,8 @@ void __init setup_arch(char **cmdline_p) setup_memory(); dma_contiguous_reserve(ident_map_size); vmcp_cma_reserve(); + if (MACHINE_HAS_EDAT2) + hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); check_initrd(); reserve_crashkernel(); -- cgit v1.2.3-70-g09d2