summaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-28 16:24:32 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-28 16:24:32 -0700
commit70cc1b5307e8ee3076fdf2ecbeb89eb973aa0ff7 (patch)
tree6928e81a009668e6af4ca7408db2c35549a0f433 /arch/powerpc/kernel
parent5ea8abf589f2974d65460a1ffa0c303763e958da (diff)
parent169f8997968ab620d750d9a45e15c5288d498356 (diff)
Merge tag 'powerpc-6.4-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: - Add support for building the kernel using PC-relative addressing on Power10. - Allow HV KVM guests on Power10 to use prefixed instructions. - Unify support for the P2020 CPU (85xx) into a single machine description. - Always build the 64-bit kernel with 128-bit long double. - Drop support for several obsolete 2000's era development boards as identified by Paul Gortmaker. - A series fixing VFIO on Power since some generic changes. - Various other small features and fixes. Thanks to Alexey Kardashevskiy, Andrew Donnellan, Benjamin Gray, Bo Liu, Christophe Leroy, Dan Carpenter, David Binderman, Ira Weiny, Joel Stanley, Kajol Jain, Kautuk Consul, Liang He, Luis Chamberlain, Masahiro Yamada, Michael Neuling, Nathan Chancellor, Nathan Lynch, Nicholas Miehlbradt, Nicholas Piggin, Nick Desaulniers, Nysal Jan K.A, Pali Rohár, Paul Gortmaker, Paul Mackerras, Petr Vaněk, Randy Dunlap, Rob Herring, Sachin Sant, Sean Christopherson, Segher Boessenkool, and Timothy Pearson. * tag 'powerpc-6.4-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (156 commits) powerpc/64s: Disable pcrel code model on Clang powerpc: Fix merge conflict between pcrel and copy_thread changes powerpc/configs/powernv: Add IGB=y powerpc/configs/64s: Drop JFS Filesystem powerpc/configs/64s: Use EXT4 to mount EXT2 filesystems powerpc/configs: Make pseries_defconfig an alias for ppc64le_guest powerpc/configs: Make pseries_le an alias for ppc64le_guest powerpc/configs: Incorporate generic kvm_guest.config into guest configs powerpc/configs: Add IBMVETH=y and IBMVNIC=y to guest configs powerpc/configs/64s: Enable Device Mapper options powerpc/configs/64s: Enable PSTORE powerpc/configs/64s: Enable VLAN support powerpc/configs/64s: Enable BLK_DEV_NVME powerpc/configs/64s: Drop REISERFS powerpc/configs/64s: Use SHA512 for module signatures powerpc/configs/64s: Enable IO_STRICT_DEVMEM powerpc/configs/64s: Enable SCHEDSTATS powerpc/configs/64s: Enable DEBUG_VM & other options powerpc/configs/64s: Enable EMULATED_STATS powerpc/configs/64s: Enable KUNIT and most tests ...
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/asm-offsets.c2
-rw-r--r--arch/powerpc/kernel/btext.c2
-rw-r--r--arch/powerpc/kernel/entry_32.S23
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S112
-rw-r--r--arch/powerpc/kernel/head_64.S133
-rw-r--r--arch/powerpc/kernel/head_booke.h1
-rw-r--r--arch/powerpc/kernel/idle.c10
-rw-r--r--arch/powerpc/kernel/interrupt.c2
-rw-r--r--arch/powerpc/kernel/interrupt_64.S56
-rw-r--r--arch/powerpc/kernel/iommu.c246
-rw-r--r--arch/powerpc/kernel/irq.c8
-rw-r--r--arch/powerpc/kernel/irq_64.c10
-rw-r--r--arch/powerpc/kernel/isa-bridge.c166
-rw-r--r--arch/powerpc/kernel/legacy_serial.c10
-rw-r--r--arch/powerpc/kernel/misc_64.S2
-rw-r--r--arch/powerpc/kernel/module_64.c377
-rw-r--r--arch/powerpc/kernel/paca.c2
-rw-r--r--arch/powerpc/kernel/pci_64.c2
-rw-r--r--arch/powerpc/kernel/process.c126
-rw-r--r--arch/powerpc/kernel/rtas.c54
-rw-r--r--arch/powerpc/kernel/setup-common.c13
-rw-r--r--arch/powerpc/kernel/time.c6
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c50
-rw-r--r--arch/powerpc/kernel/vdso/gettimeofday.S6
-rw-r--r--arch/powerpc/kernel/vector.S6
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S6
26 files changed, 1014 insertions, 417 deletions
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index d24a59a98c0c..9f14d95b8b32 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -185,7 +185,9 @@ int main(void)
offsetof(struct task_struct, thread_info));
OFFSET(PACASAVEDMSR, paca_struct, saved_msr);
OFFSET(PACAR1, paca_struct, saved_r1);
+#ifndef CONFIG_PPC_KERNEL_PCREL
OFFSET(PACATOC, paca_struct, kernel_toc);
+#endif
OFFSET(PACAKBASE, paca_struct, kernelbase);
OFFSET(PACAKMSR, paca_struct, kernel_msr);
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 2769889219bf..19e46fd623b0 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -235,7 +235,7 @@ int __init btext_find_display(int allow_nonstdout)
return rc;
for_each_node_by_type(np, "display") {
- if (of_get_property(np, "linux,opened", NULL)) {
+ if (of_property_read_bool(np, "linux,opened")) {
printk("trying %pOF ...\n", np);
rc = btext_initialize(np);
printk("result: %d\n", rc);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 5604c9a1ac22..47f0dd9a45ad 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -183,12 +183,11 @@ syscall_exit_finish:
ret_from_fork:
REST_NVGPRS(r1)
bl schedule_tail
- li r3,0
+ li r3,0 /* fork() return value */
b ret_from_syscall
- .globl ret_from_kernel_thread
-ret_from_kernel_thread:
- REST_NVGPRS(r1)
+ .globl ret_from_kernel_user_thread
+ret_from_kernel_user_thread:
bl schedule_tail
mtctr r14
mr r3,r15
@@ -197,6 +196,22 @@ ret_from_kernel_thread:
li r3,0
b ret_from_syscall
+ .globl start_kernel_thread
+start_kernel_thread:
+ bl schedule_tail
+ mtctr r14
+ mr r3,r15
+ PPC440EP_ERR42
+ bctrl
+ /*
+ * This must not return. We actually want to BUG here, not WARN,
+ * because BUG will exit the process which is what the kernel thread
+ * should have done, which may give some hope of continuing.
+ */
+100: trap
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0
+
+
/*
* This routine switches between two different tasks. The process
* state of one is saved on its kernel stack. Then the state
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 6441a1ba57ac..c33c8ebf8641 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1075,7 +1075,7 @@ EXC_COMMON_BEGIN(system_reset_common)
__GEN_COMMON_BODY system_reset
addi r3,r1,STACK_INT_FRAME_REGS
- bl system_reset_exception
+ bl CFUNC(system_reset_exception)
/* Clear MSR_RI before setting SRR0 and SRR1. */
li r9,0
@@ -1223,9 +1223,9 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
addi r3,r1,STACK_INT_FRAME_REGS
BEGIN_FTR_SECTION
- bl machine_check_early_boot
+ bl CFUNC(machine_check_early_boot)
END_FTR_SECTION(0, 1) // nop out after boot
- bl machine_check_early
+ bl CFUNC(machine_check_early)
std r3,RESULT(r1) /* Save result */
ld r12,_MSR(r1)
@@ -1286,7 +1286,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
* Queue up the MCE event so that we can log it later, while
* returning from kernel or opal call.
*/
- bl machine_check_queue_event
+ bl CFUNC(machine_check_queue_event)
MACHINE_CHECK_HANDLER_WINDUP
RFI_TO_KERNEL
@@ -1312,7 +1312,7 @@ EXC_COMMON_BEGIN(machine_check_common)
*/
GEN_COMMON machine_check
addi r3,r1,STACK_INT_FRAME_REGS
- bl machine_check_exception_async
+ bl CFUNC(machine_check_exception_async)
b interrupt_return_srr
@@ -1322,7 +1322,7 @@ EXC_COMMON_BEGIN(machine_check_common)
* done. Queue the event then call the idle code to do the wake up.
*/
EXC_COMMON_BEGIN(machine_check_idle_common)
- bl machine_check_queue_event
+ bl CFUNC(machine_check_queue_event)
/*
* GPR-loss wakeups are relatively straightforward, because the
@@ -1361,7 +1361,7 @@ EXC_COMMON_BEGIN(unrecoverable_mce)
BEGIN_FTR_SECTION
li r10,0 /* clear MSR_RI */
mtmsrd r10,1
- bl disable_machine_check
+ bl CFUNC(disable_machine_check)
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
ld r10,PACAKMSR(r13)
li r3,MSR_ME
@@ -1378,14 +1378,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
* the early handler which is a true NMI.
*/
addi r3,r1,STACK_INT_FRAME_REGS
- bl machine_check_exception
+ bl CFUNC(machine_check_exception)
/*
* We will not reach here. Even if we did, there is no way out.
* Call unrecoverable_exception and die.
*/
addi r3,r1,STACK_INT_FRAME_REGS
- bl unrecoverable_exception
+ bl CFUNC(unrecoverable_exception)
b .
@@ -1440,16 +1440,16 @@ EXC_COMMON_BEGIN(data_access_common)
bne- 1f
#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
- bl do_hash_fault
+ bl CFUNC(do_hash_fault)
MMU_FTR_SECTION_ELSE
- bl do_page_fault
+ bl CFUNC(do_page_fault)
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
#else
- bl do_page_fault
+ bl CFUNC(do_page_fault)
#endif
b interrupt_return_srr
-1: bl do_break
+1: bl CFUNC(do_break)
/*
* do_break() may have changed the NV GPRS while handling a breakpoint.
* If so, we need to restore them with their updated values.
@@ -1493,7 +1493,7 @@ EXC_COMMON_BEGIN(data_access_slb_common)
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
addi r3,r1,STACK_INT_FRAME_REGS
- bl do_slb_fault
+ bl CFUNC(do_slb_fault)
cmpdi r3,0
bne- 1f
b fast_interrupt_return_srr
@@ -1507,7 +1507,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
#endif
std r3,RESULT(r1)
addi r3,r1,STACK_INT_FRAME_REGS
- bl do_bad_segment_interrupt
+ bl CFUNC(do_bad_segment_interrupt)
b interrupt_return_srr
@@ -1541,12 +1541,12 @@ EXC_COMMON_BEGIN(instruction_access_common)
addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
- bl do_hash_fault
+ bl CFUNC(do_hash_fault)
MMU_FTR_SECTION_ELSE
- bl do_page_fault
+ bl CFUNC(do_page_fault)
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
#else
- bl do_page_fault
+ bl CFUNC(do_page_fault)
#endif
b interrupt_return_srr
@@ -1581,7 +1581,7 @@ EXC_COMMON_BEGIN(instruction_access_slb_common)
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
addi r3,r1,STACK_INT_FRAME_REGS
- bl do_slb_fault
+ bl CFUNC(do_slb_fault)
cmpdi r3,0
bne- 1f
b fast_interrupt_return_srr
@@ -1595,7 +1595,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
#endif
std r3,RESULT(r1)
addi r3,r1,STACK_INT_FRAME_REGS
- bl do_bad_segment_interrupt
+ bl CFUNC(do_bad_segment_interrupt)
b interrupt_return_srr
@@ -1649,7 +1649,7 @@ EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
EXC_COMMON_BEGIN(hardware_interrupt_common)
GEN_COMMON hardware_interrupt
addi r3,r1,STACK_INT_FRAME_REGS
- bl do_IRQ
+ bl CFUNC(do_IRQ)
BEGIN_FTR_SECTION
b interrupt_return_hsrr
FTR_SECTION_ELSE
@@ -1679,7 +1679,7 @@ EXC_VIRT_END(alignment, 0x4600, 0x100)
EXC_COMMON_BEGIN(alignment_common)
GEN_COMMON alignment
addi r3,r1,STACK_INT_FRAME_REGS
- bl alignment_exception
+ bl CFUNC(alignment_exception)
HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
b interrupt_return_srr
@@ -1745,7 +1745,7 @@ EXC_COMMON_BEGIN(program_check_common)
.Ldo_program_check:
addi r3,r1,STACK_INT_FRAME_REGS
- bl program_check_exception
+ bl CFUNC(program_check_exception)
HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
b interrupt_return_srr
@@ -1777,7 +1777,7 @@ EXC_COMMON_BEGIN(fp_unavailable_common)
GEN_COMMON fp_unavailable
bne 1f /* if from user, just load it up */
addi r3,r1,STACK_INT_FRAME_REGS
- bl kernel_fp_unavailable_exception
+ bl CFUNC(kernel_fp_unavailable_exception)
0: trap
EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
1:
@@ -1790,12 +1790,12 @@ BEGIN_FTR_SECTION
bne- 2f
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
- bl load_up_fpu
+ bl CFUNC(load_up_fpu)
b fast_interrupt_return_srr
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
addi r3,r1,STACK_INT_FRAME_REGS
- bl fp_unavailable_tm
+ bl CFUNC(fp_unavailable_tm)
b interrupt_return_srr
#endif
@@ -1839,7 +1839,7 @@ EXC_VIRT_END(decrementer, 0x4900, 0x80)
EXC_COMMON_BEGIN(decrementer_common)
GEN_COMMON decrementer
addi r3,r1,STACK_INT_FRAME_REGS
- bl timer_interrupt
+ bl CFUNC(timer_interrupt)
b interrupt_return_srr
@@ -1925,9 +1925,9 @@ EXC_COMMON_BEGIN(doorbell_super_common)
GEN_COMMON doorbell_super
addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_PPC_DOORBELL
- bl doorbell_exception
+ bl CFUNC(doorbell_exception)
#else
- bl unknown_async_exception
+ bl CFUNC(unknown_async_exception)
#endif
b interrupt_return_srr
@@ -2091,7 +2091,7 @@ EXC_VIRT_END(single_step, 0x4d00, 0x100)
EXC_COMMON_BEGIN(single_step_common)
GEN_COMMON single_step
addi r3,r1,STACK_INT_FRAME_REGS
- bl single_step_exception
+ bl CFUNC(single_step_exception)
b interrupt_return_srr
@@ -2126,9 +2126,9 @@ EXC_COMMON_BEGIN(h_data_storage_common)
GEN_COMMON h_data_storage
addi r3,r1,STACK_INT_FRAME_REGS
BEGIN_MMU_FTR_SECTION
- bl do_bad_page_fault_segv
+ bl CFUNC(do_bad_page_fault_segv)
MMU_FTR_SECTION_ELSE
- bl unknown_exception
+ bl CFUNC(unknown_exception)
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
b interrupt_return_hsrr
@@ -2154,7 +2154,7 @@ EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20)
EXC_COMMON_BEGIN(h_instr_storage_common)
GEN_COMMON h_instr_storage
addi r3,r1,STACK_INT_FRAME_REGS
- bl unknown_exception
+ bl CFUNC(unknown_exception)
b interrupt_return_hsrr
@@ -2177,7 +2177,7 @@ EXC_VIRT_END(emulation_assist, 0x4e40, 0x20)
EXC_COMMON_BEGIN(emulation_assist_common)
GEN_COMMON emulation_assist
addi r3,r1,STACK_INT_FRAME_REGS
- bl emulation_assist_interrupt
+ bl CFUNC(emulation_assist_interrupt)
HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
b interrupt_return_hsrr
@@ -2237,7 +2237,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
__GEN_COMMON_BODY hmi_exception_early
addi r3,r1,STACK_INT_FRAME_REGS
- bl hmi_exception_realmode
+ bl CFUNC(hmi_exception_realmode)
cmpdi cr0,r3,0
bne 1f
@@ -2255,7 +2255,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
EXC_COMMON_BEGIN(hmi_exception_common)
GEN_COMMON hmi_exception
addi r3,r1,STACK_INT_FRAME_REGS
- bl handle_hmi_exception
+ bl CFUNC(handle_hmi_exception)
b interrupt_return_hsrr
@@ -2290,9 +2290,9 @@ EXC_COMMON_BEGIN(h_doorbell_common)
GEN_COMMON h_doorbell
addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_PPC_DOORBELL
- bl doorbell_exception
+ bl CFUNC(doorbell_exception)
#else
- bl unknown_async_exception
+ bl CFUNC(unknown_async_exception)
#endif
b interrupt_return_hsrr
@@ -2325,7 +2325,7 @@ EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
EXC_COMMON_BEGIN(h_virt_irq_common)
GEN_COMMON h_virt_irq
addi r3,r1,STACK_INT_FRAME_REGS
- bl do_IRQ
+ bl CFUNC(do_IRQ)
b interrupt_return_hsrr
@@ -2374,10 +2374,10 @@ EXC_COMMON_BEGIN(performance_monitor_common)
lbz r4,PACAIRQSOFTMASK(r13)
cmpdi r4,IRQS_ENABLED
bne 1f
- bl performance_monitor_exception_async
+ bl CFUNC(performance_monitor_exception_async)
b interrupt_return_srr
1:
- bl performance_monitor_exception_nmi
+ bl CFUNC(performance_monitor_exception_nmi)
/* Clear MSR_RI before setting SRR0 and SRR1. */
li r9,0
mtmsrd r9,1
@@ -2421,19 +2421,19 @@ BEGIN_FTR_SECTION
bne- 2f
END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
#endif
- bl load_up_altivec
+ bl CFUNC(load_up_altivec)
b fast_interrupt_return_srr
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
addi r3,r1,STACK_INT_FRAME_REGS
- bl altivec_unavailable_tm
+ bl CFUNC(altivec_unavailable_tm)
b interrupt_return_srr
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
addi r3,r1,STACK_INT_FRAME_REGS
- bl altivec_unavailable_exception
+ bl CFUNC(altivec_unavailable_exception)
b interrupt_return_srr
@@ -2475,14 +2475,14 @@ BEGIN_FTR_SECTION
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
addi r3,r1,STACK_INT_FRAME_REGS
- bl vsx_unavailable_tm
+ bl CFUNC(vsx_unavailable_tm)
b interrupt_return_srr
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
addi r3,r1,STACK_INT_FRAME_REGS
- bl vsx_unavailable_exception
+ bl CFUNC(vsx_unavailable_exception)
b interrupt_return_srr
@@ -2509,7 +2509,7 @@ EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20)
EXC_COMMON_BEGIN(facility_unavailable_common)
GEN_COMMON facility_unavailable
addi r3,r1,STACK_INT_FRAME_REGS
- bl facility_unavailable_exception
+ bl CFUNC(facility_unavailable_exception)
HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
b interrupt_return_srr
@@ -2537,7 +2537,7 @@ EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20)
EXC_COMMON_BEGIN(h_facility_unavailable_common)
GEN_COMMON h_facility_unavailable
addi r3,r1,STACK_INT_FRAME_REGS
- bl facility_unavailable_exception
+ bl CFUNC(facility_unavailable_exception)
/* XXX Shouldn't be necessary in practice */
HANDLER_RESTORE_NVGPRS()
b interrupt_return_hsrr
@@ -2568,7 +2568,7 @@ EXC_VIRT_NONE(0x5200, 0x100)
EXC_COMMON_BEGIN(cbe_system_error_common)
GEN_COMMON cbe_system_error
addi r3,r1,STACK_INT_FRAME_REGS
- bl cbe_system_error_exception
+ bl CFUNC(cbe_system_error_exception)
b interrupt_return_hsrr
#else /* CONFIG_CBE_RAS */
@@ -2599,7 +2599,7 @@ EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100)
EXC_COMMON_BEGIN(instruction_breakpoint_common)
GEN_COMMON instruction_breakpoint
addi r3,r1,STACK_INT_FRAME_REGS
- bl instruction_breakpoint_exception
+ bl CFUNC(instruction_breakpoint_exception)
b interrupt_return_srr
@@ -2721,7 +2721,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
EXC_COMMON_BEGIN(denorm_exception_common)
GEN_COMMON denorm_exception
addi r3,r1,STACK_INT_FRAME_REGS
- bl unknown_exception
+ bl CFUNC(unknown_exception)
b interrupt_return_hsrr
@@ -2738,7 +2738,7 @@ EXC_VIRT_NONE(0x5600, 0x100)
EXC_COMMON_BEGIN(cbe_maintenance_common)
GEN_COMMON cbe_maintenance
addi r3,r1,STACK_INT_FRAME_REGS
- bl cbe_maintenance_exception
+ bl CFUNC(cbe_maintenance_exception)
b interrupt_return_hsrr
#else /* CONFIG_CBE_RAS */
@@ -2764,10 +2764,10 @@ EXC_COMMON_BEGIN(altivec_assist_common)
GEN_COMMON altivec_assist
addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_ALTIVEC
- bl altivec_assist_exception
+ bl CFUNC(altivec_assist_exception)
HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
#else
- bl unknown_exception
+ bl CFUNC(unknown_exception)
#endif
b interrupt_return_srr
@@ -2785,7 +2785,7 @@ EXC_VIRT_NONE(0x5800, 0x100)
EXC_COMMON_BEGIN(cbe_thermal_common)
GEN_COMMON cbe_thermal
addi r3,r1,STACK_INT_FRAME_REGS
- bl cbe_thermal_exception
+ bl CFUNC(cbe_thermal_exception)
b interrupt_return_hsrr
#else /* CONFIG_CBE_RAS */
@@ -2818,7 +2818,7 @@ EXC_COMMON_BEGIN(soft_nmi_common)
__GEN_COMMON_BODY soft_nmi
addi r3,r1,STACK_INT_FRAME_REGS
- bl soft_nmi_interrupt
+ bl CFUNC(soft_nmi_interrupt)
/* Clear MSR_RI before setting SRR0 and SRR1. */
li r9,0
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 1febb56ebaeb..f132d8704263 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -76,6 +76,13 @@
* 2. The kernel is entered at __start
*/
+/*
+ * boot_from_prom and prom_init run at the physical address. Everything
+ * after prom and kexec entry run at the virtual address (PAGE_OFFSET).
+ * Secondaries run at the virtual address from generic_secondary_common_init
+ * onward.
+ */
+
OPEN_FIXED_SECTION(first_256B, 0x0, 0x100)
USE_FIXED_SECTION(first_256B)
/*
@@ -303,13 +310,11 @@ _GLOBAL(fsl_secondary_thread_init)
/* turn on 64-bit mode */
bl enable_64b_mode
- /* get a valid TOC pointer, wherever we're mapped at */
- bl relative_toc
- tovirt(r2,r2)
-
/* Book3E initialization */
mr r3,r24
bl book3e_secondary_thread_init
+ bl relative_toc
+
b generic_secondary_common_init
#endif /* CONFIG_PPC_BOOK3E_64 */
@@ -325,22 +330,24 @@ _GLOBAL(fsl_secondary_thread_init)
*/
_GLOBAL(generic_secondary_smp_init)
FIXUP_ENDIAN
+
+ li r13,0
+
+ /* Poison TOC */
+ li r2,-1
+
mr r24,r3
mr r25,r4
/* turn on 64-bit mode */
bl enable_64b_mode
- /* get a valid TOC pointer, wherever we're mapped at */
- bl relative_toc
- tovirt(r2,r2)
-
#ifdef CONFIG_PPC_BOOK3E_64
/* Book3E initialization */
mr r3,r24
mr r4,r25
bl book3e_secondary_core_init
-
+ /* Now NIA and r2 are relocated to PAGE_OFFSET if not already */
/*
* After common core init has finished, check if the current thread is the
* one we wanted to boot. If not, start the specified thread and stop the
@@ -378,6 +385,16 @@ _GLOBAL(generic_secondary_smp_init)
10:
b 10b
20:
+#else
+ /* Now the MMU is off, can branch to our PAGE_OFFSET address */
+ bcl 20,31,$+4
+1: mflr r11
+ addi r11,r11,(2f - 1b)
+ tovirt(r11, r11)
+ mtctr r11
+ bctr
+2:
+ bl relative_toc
#endif
generic_secondary_common_init:
@@ -492,6 +509,8 @@ SYM_FUNC_START_LOCAL(start_initialization_book3s)
/* Switch off MMU if not already off */
bl __mmu_off
+ /* Now the MMU is off, can return to our PAGE_OFFSET address */
+ tovirt(r25,r25)
mtlr r25
blr
SYM_FUNC_END(start_initialization_book3s)
@@ -515,14 +534,8 @@ __start_initialization_multiplatform:
/* Zero r13 (paca) so early program check / mce don't use it */
li r13,0
- /* Get TOC pointer (current runtime address) */
- bl relative_toc
-
- /* find out where we are now */
- bcl 20,31,$+4
-0: mflr r26 /* r26 = runtime addr here */
- addis r26,r26,(_stext - 0b)@ha
- addi r26,r26,(_stext - 0b)@l /* current runtime base addr */
+ /* Poison TOC */
+ li r2,-1
/*
* Are we booted from a PROM Of-type client-interface ?
@@ -540,16 +553,41 @@ __start_initialization_multiplatform:
mr r29,r9
#endif
+ /* Get TOC pointer (current runtime address) */
+ bl relative_toc
+
+ /* These functions return to the virtual (PAGE_OFFSET) address */
#ifdef CONFIG_PPC_BOOK3E_64
bl start_initialization_book3e
#else
bl start_initialization_book3s
#endif /* CONFIG_PPC_BOOK3E_64 */
+
+ /* Get TOC pointer, virtual */
+ bl relative_toc
+
+ /* find out where we are now */
+
+ /* OPAL doesn't pass base address in r4, have to derive it. */
+ bcl 20,31,$+4
+0: mflr r26 /* r26 = runtime addr here */
+ addis r26,r26,(_stext - 0b)@ha
+ addi r26,r26,(_stext - 0b)@l /* current runtime base addr */
+
b __after_prom_start
__REF
__boot_from_prom:
#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
+ /* Get TOC pointer, non-virtual */
+ bl relative_toc
+
+ /* find out where we are now */
+ bcl 20,31,$+4
+0: mflr r26 /* r26 = runtime addr here */
+ addis r26,r26,(_stext - 0b)@ha
+ addi r26,r26,(_stext - 0b)@l /* current runtime base addr */
+
/* Save parameters */
mr r31,r3
mr r30,r4
@@ -579,7 +617,7 @@ __boot_from_prom:
/* Do all of the interaction with OF client interface */
mr r8,r26
- bl prom_init
+ bl CFUNC(prom_init)
#endif /* #CONFIG_PPC_OF_BOOT_TRAMPOLINE */
/* We never return. We also hit that trap if trying to boot
@@ -590,18 +628,11 @@ __boot_from_prom:
__after_prom_start:
#ifdef CONFIG_RELOCATABLE
/* process relocations for the final address of the kernel */
- lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */
- sldi r25,r25,32
-#if defined(CONFIG_PPC_BOOK3E_64)
- tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */
-#endif
lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26)
-#if defined(CONFIG_PPC_BOOK3E_64)
- tophys(r26,r26)
-#endif
cmplwi cr0,r7,1 /* flagged to stay where we are ? */
- bne 1f
- add r25,r25,r26
+ mr r25,r26 /* then use current kernel base */
+ beq 1f
+ LOAD_REG_IMMEDIATE(r25, PAGE_OFFSET) /* else use static kernel base */
1: mr r3,r25
bl relocate
#if defined(CONFIG_PPC_BOOK3E_64)
@@ -617,14 +648,8 @@ __after_prom_start:
*
* Note: This process overwrites the OF exception vectors.
*/
- li r3,0 /* target addr */
-#ifdef CONFIG_PPC_BOOK3E_64
- tovirt(r3,r3) /* on booke, we already run at PAGE_OFFSET */
-#endif
+ LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET)
mr. r4,r26 /* In some cases the loader may */
-#if defined(CONFIG_PPC_BOOK3E_64)
- tovirt(r4,r4)
-#endif
beq 9f /* have already put us at zero */
li r6,0x100 /* Start offset, the first 0x100 */
/* bytes were copied earlier. */
@@ -635,9 +660,6 @@ __after_prom_start:
* variable __run_at_load, if it is set the kernel is treated as relocatable
* kernel, otherwise it will be moved to PHYSICAL_START
*/
-#if defined(CONFIG_PPC_BOOK3E_64)
- tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */
-#endif
lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26)
cmplwi cr0,r7,1
bne 3f
@@ -756,9 +778,15 @@ _GLOBAL(pmac_secondary_start)
sync
slbia
- /* get TOC pointer (real address) */
+ /* Branch to our PAGE_OFFSET address */
+ bcl 20,31,$+4
+1: mflr r11
+ addi r11,r11,(2f - 1b)
+ tovirt(r11, r11)
+ mtctr r11
+ bctr
+2:
bl relative_toc
- tovirt(r2,r2)
/* Copy some CPU settings from CPU 0 */
bl __restore_cpu_ppc970
@@ -817,7 +845,7 @@ __secondary_start:
* can turn it on below. This is a call to C, which is OK, we're still
* running on the emergency stack.
*/
- bl early_setup_secondary
+ bl CFUNC(early_setup_secondary)
/*
* The primary has initialized our kernel stack for us in the paca, grab
@@ -856,7 +884,7 @@ start_secondary_prolog:
LOAD_PACA_TOC()
li r3,0
std r3,0(r1) /* Zero the stack frame pointer */
- bl start_secondary
+ bl CFUNC(start_secondary)
b .
/*
* Reset stack pointer and call start_secondary
@@ -867,7 +895,7 @@ _GLOBAL(start_secondary_resume)
ld r1,PACAKSAVE(r13) /* Reload kernel stack pointer */
li r3,0
std r3,0(r1) /* Zero the stack frame pointer */
- bl start_secondary
+ bl CFUNC(start_secondary)
b .
#endif
@@ -897,10 +925,15 @@ SYM_FUNC_END(enable_64b_mode)
* TOC in -mcmodel=medium mode. After we relocate to 0 but before
* the MMU is on we need our TOC to be a virtual address otherwise
* these pointers will be real addresses which may get stored and
- * accessed later with the MMU on. We use tovirt() at the call
- * sites to handle this.
+ * accessed later with the MMU on. We branch to the virtual address
+ * while still in real mode then call relative_toc again to handle
+ * this.
*/
_GLOBAL(relative_toc)
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ tdnei r2,-1
+ blr
+#else
mflr r0
bcl 20,31,$+4
0: mflr r11
@@ -911,15 +944,15 @@ _GLOBAL(relative_toc)
.balign 8
p_toc: .8byte .TOC. - 0b
+#endif
/*
* This is where the main kernel code starts.
*/
__REF
start_here_multiplatform:
- /* set up the TOC */
- bl relative_toc
- tovirt(r2,r2)
+ /* Adjust TOC for moved kernel. Could adjust when moving it instead. */
+ bl relative_toc
/* Clear out the BSS. It may have been done in prom_init,
* already but that's irrelevant since prom_init will soon
@@ -972,7 +1005,7 @@ start_here_multiplatform:
*/
#ifdef CONFIG_KASAN
- bl kasan_early_init
+ bl CFUNC(kasan_early_init)
#endif
/* Restore parameters passed from prom_init/kexec */
mr r3,r31
@@ -1005,7 +1038,7 @@ start_here_common:
stb r0,PACAIRQHAPPENED(r13)
/* Generic kernel entry */
- bl start_kernel
+ bl CFUNC(start_kernel)
/* Not reached */
0: trap
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 37d43c172676..b6b5b01a173c 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -5,6 +5,7 @@
#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
#include <asm/kvm_asm.h>
#include <asm/kvm_booke_hv_asm.h>
+#include <asm/thread_info.h> /* for THREAD_SHIFT */
#ifdef __ASSEMBLY__
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index b9a725abc596..b1c0418b25c8 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -107,19 +107,11 @@ static struct ctl_table powersave_nap_ctl_table[] = {
},
{}
};
-static struct ctl_table powersave_nap_sysctl_root[] = {
- {
- .procname = "kernel",
- .mode = 0555,
- .child = powersave_nap_ctl_table,
- },
- {}
-};
static int __init
register_powersave_nap_sysctl(void)
{
- register_sysctl_table(powersave_nap_sysctl_root);
+ register_sysctl("kernel", powersave_nap_ctl_table);
return 0;
}
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 0ec1581619db..e34c72285b4e 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -95,7 +95,7 @@ static notrace void booke_load_dbcr0(void)
#endif
}
-static void check_return_regs_valid(struct pt_regs *regs)
+static notrace void check_return_regs_valid(struct pt_regs *regs)
{
#ifdef CONFIG_PPC_BOOK3S_64
unsigned long trap, srr0, srr1;
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index fccc34489add..bd863702d812 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -101,12 +101,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
* state of kernel code.
*/
SANITIZE_SYSCALL_GPRS()
- bl system_call_exception
+ bl CFUNC(system_call_exception)
.Lsyscall_vectored_\name\()_exit:
addi r4,r1,STACK_INT_FRAME_REGS
li r5,1 /* scv */
- bl syscall_exit_prepare
+ bl CFUNC(syscall_exit_prepare)
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
.Lsyscall_vectored_\name\()_rst_start:
lbz r11,PACAIRQHAPPENED(r13)
@@ -185,7 +185,7 @@ _ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart)
addi r4,r1,STACK_INT_FRAME_REGS
li r11,IRQS_ALL_DISABLED
stb r11,PACAIRQSOFTMASK(r13)
- bl syscall_exit_restart
+ bl CFUNC(syscall_exit_restart)
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
b .Lsyscall_vectored_\name\()_rst_start
1:
@@ -286,12 +286,12 @@ END_BTB_FLUSH_SECTION
* state of kernel code.
*/
SANITIZE_SYSCALL_GPRS()
- bl system_call_exception
+ bl CFUNC(system_call_exception)
.Lsyscall_exit:
addi r4,r1,STACK_INT_FRAME_REGS
li r5,0 /* !scv */
- bl syscall_exit_prepare
+ bl CFUNC(syscall_exit_prepare)
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
#ifdef CONFIG_PPC_BOOK3S
.Lsyscall_rst_start:
@@ -372,7 +372,7 @@ _ASM_NOKPROBE_SYMBOL(syscall_restart)
addi r4,r1,STACK_INT_FRAME_REGS
li r11,IRQS_ALL_DISABLED
stb r11,PACAIRQSOFTMASK(r13)
- bl syscall_exit_restart
+ bl CFUNC(syscall_exit_restart)
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
b .Lsyscall_rst_start
1:
@@ -401,7 +401,7 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr)
li r3,0 /* 0 return value, no EMULATE_STACK_STORE */
bne+ .Lfast_kernel_interrupt_return_srr
addi r3,r1,STACK_INT_FRAME_REGS
- bl unrecoverable_exception
+ bl CFUNC(unrecoverable_exception)
b . /* should not get here */
#else
bne .Lfast_user_interrupt_return_srr
@@ -419,7 +419,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\())
interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */
_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
addi r3,r1,STACK_INT_FRAME_REGS
- bl interrupt_exit_user_prepare
+ bl CFUNC(interrupt_exit_user_prepare)
#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS
cmpdi r3,0
bne- .Lrestore_nvgprs_\srr
@@ -523,7 +523,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart)
addi r3,r1,STACK_INT_FRAME_REGS
li r11,IRQS_ALL_DISABLED
stb r11,PACAIRQSOFTMASK(r13)
- bl interrupt_exit_user_restart
+ bl CFUNC(interrupt_exit_user_restart)
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
b .Linterrupt_return_\srr\()_user_rst_start
1:
@@ -536,7 +536,7 @@ RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr
interrupt_return_\srr\()_kernel:
_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
addi r3,r1,STACK_INT_FRAME_REGS
- bl interrupt_exit_kernel_prepare
+ bl CFUNC(interrupt_exit_kernel_prepare)
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
.Linterrupt_return_\srr\()_kernel_rst_start:
@@ -705,7 +705,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart)
addi r3,r1,STACK_INT_FRAME_REGS
li r11,IRQS_ALL_DISABLED
stb r11,PACAIRQSOFTMASK(r13)
- bl interrupt_exit_kernel_restart
+ bl CFUNC(interrupt_exit_kernel_restart)
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
b .Linterrupt_return_\srr\()_kernel_rst_start
1:
@@ -727,21 +727,20 @@ DEFINE_FIXED_SYMBOL(__end_soft_masked, text)
#ifdef CONFIG_PPC_BOOK3S
_GLOBAL(ret_from_fork_scv)
- bl schedule_tail
- REST_NVGPRS(r1)
+ bl CFUNC(schedule_tail)
+ HANDLER_RESTORE_NVGPRS()
li r3,0 /* fork() return value */
b .Lsyscall_vectored_common_exit
#endif
_GLOBAL(ret_from_fork)
- bl schedule_tail
- REST_NVGPRS(r1)
+ bl CFUNC(schedule_tail)
+ HANDLER_RESTORE_NVGPRS()
li r3,0 /* fork() return value */
b .Lsyscall_exit
-_GLOBAL(ret_from_kernel_thread)
- bl schedule_tail
- REST_NVGPRS(r1)
+_GLOBAL(ret_from_kernel_user_thread)
+ bl CFUNC(schedule_tail)
mtctr r14
mr r3,r15
#ifdef CONFIG_PPC64_ELF_ABI_V2
@@ -749,4 +748,25 @@ _GLOBAL(ret_from_kernel_thread)
#endif
bctrl
li r3,0
+ /*
+ * It does not matter whether this returns via the scv or sc path
+ * because it returns as execve() and therefore has no calling ABI
+ * (i.e., it sets registers according to the exec()ed entry point).
+ */
b .Lsyscall_exit
+
+_GLOBAL(start_kernel_thread)
+ bl CFUNC(schedule_tail)
+ mtctr r14
+ mr r3,r15
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+ mr r12,r14
+#endif
+ bctrl
+ /*
+ * This must not return. We actually want to BUG here, not WARN,
+ * because BUG will exit the process which is what the kernel thread
+ * should have done, which may give some hope of continuing.
+ */
+100: trap
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index ee95937bdaf1..0089dd49b4cb 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -35,6 +35,7 @@
#include <asm/vio.h>
#include <asm/tce.h>
#include <asm/mmu_context.h>
+#include <asm/ppc-pci.h>
#define DBG(...)
@@ -1086,7 +1087,7 @@ void iommu_tce_kill(struct iommu_table *tbl,
}
EXPORT_SYMBOL_GPL(iommu_tce_kill);
-int iommu_take_ownership(struct iommu_table *tbl)
+static int iommu_take_ownership(struct iommu_table *tbl)
{
unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
int ret = 0;
@@ -1118,9 +1119,8 @@ int iommu_take_ownership(struct iommu_table *tbl)
return ret;
}
-EXPORT_SYMBOL_GPL(iommu_take_ownership);
-void iommu_release_ownership(struct iommu_table *tbl)
+static void iommu_release_ownership(struct iommu_table *tbl)
{
unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
@@ -1137,7 +1137,6 @@ void iommu_release_ownership(struct iommu_table *tbl)
spin_unlock(&tbl->pools[i].lock);
spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
}
-EXPORT_SYMBOL_GPL(iommu_release_ownership);
int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
{
@@ -1158,8 +1157,14 @@ int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
pr_debug("%s: Adding %s to iommu group %d\n",
__func__, dev_name(dev), iommu_group_id(table_group->group));
-
- return iommu_group_add_device(table_group->group, dev);
+ /*
+ * This is still not adding devices via the IOMMU bus notifier because
+ * of pcibios_init() from arch/powerpc/kernel/pci_64.c which calls
+ * pcibios_scan_phb() first (and this guy adds devices and triggers
+ * the notifier) and only then it calls pci_bus_add_devices() which
+ * configures DMA for buses which also creates PEs and IOMMU groups.
+ */
+ return iommu_probe_device(dev);
}
EXPORT_SYMBOL_GPL(iommu_add_device);
@@ -1179,4 +1184,233 @@ void iommu_del_device(struct device *dev)
iommu_group_remove_device(dev);
}
EXPORT_SYMBOL_GPL(iommu_del_device);
+
+/*
+ * A simple iommu_table_group_ops which only allows reusing the existing
+ * iommu_table. This handles VFIO for POWER7 or the nested KVM.
+ * The ops does not allow creating windows and only allows reusing the existing
+ * one if it matches table_group->tce32_start/tce32_size/page_shift.
+ */
+static unsigned long spapr_tce_get_table_size(__u32 page_shift,
+ __u64 window_size, __u32 levels)
+{
+ unsigned long size;
+
+ if (levels > 1)
+ return ~0U;
+ size = window_size >> (page_shift - 3);
+ return size;
+}
+
+static long spapr_tce_create_table(struct iommu_table_group *table_group, int num,
+ __u32 page_shift, __u64 window_size, __u32 levels,
+ struct iommu_table **ptbl)
+{
+ struct iommu_table *tbl = table_group->tables[0];
+
+ if (num > 0)
+ return -EPERM;
+
+ if (tbl->it_page_shift != page_shift ||
+ tbl->it_size != (window_size >> page_shift) ||
+ tbl->it_indirect_levels != levels - 1)
+ return -EINVAL;
+
+ *ptbl = iommu_tce_table_get(tbl);
+ return 0;
+}
+
+static long spapr_tce_set_window(struct iommu_table_group *table_group,
+ int num, struct iommu_table *tbl)
+{
+ return tbl == table_group->tables[num] ? 0 : -EPERM;
+}
+
+static long spapr_tce_unset_window(struct iommu_table_group *table_group, int num)
+{
+ return 0;
+}
+
+static long spapr_tce_take_ownership(struct iommu_table_group *table_group)
+{
+ int i, j, rc = 0;
+
+ for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+ struct iommu_table *tbl = table_group->tables[i];
+
+ if (!tbl || !tbl->it_map)
+ continue;
+
+ rc = iommu_take_ownership(tbl);
+ if (!rc)
+ continue;
+
+ for (j = 0; j < i; ++j)
+ iommu_release_ownership(table_group->tables[j]);
+ return rc;
+ }
+ return 0;
+}
+
+static void spapr_tce_release_ownership(struct iommu_table_group *table_group)
+{
+ int i;
+
+ for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+ struct iommu_table *tbl = table_group->tables[i];
+
+ if (!tbl)
+ continue;
+
+ iommu_table_clear(tbl);
+ if (tbl->it_map)
+ iommu_release_ownership(tbl);
+ }
+}
+
+struct iommu_table_group_ops spapr_tce_table_group_ops = {
+ .get_table_size = spapr_tce_get_table_size,
+ .create_table = spapr_tce_create_table,
+ .set_window = spapr_tce_set_window,
+ .unset_window = spapr_tce_unset_window,
+ .take_ownership = spapr_tce_take_ownership,
+ .release_ownership = spapr_tce_release_ownership,
+};
+
+/*
+ * A simple iommu_ops to allow less cruft in generic VFIO code.
+ */
+static int spapr_tce_blocking_iommu_attach_dev(struct iommu_domain *dom,
+ struct device *dev)
+{
+ struct iommu_group *grp = iommu_group_get(dev);
+ struct iommu_table_group *table_group;
+ int ret = -EINVAL;
+
+ if (!grp)
+ return -ENODEV;
+
+ table_group = iommu_group_get_iommudata(grp);
+ ret = table_group->ops->take_ownership(table_group);
+ iommu_group_put(grp);
+
+ return ret;
+}
+
+static void spapr_tce_blocking_iommu_set_platform_dma(struct device *dev)
+{
+ struct iommu_group *grp = iommu_group_get(dev);
+ struct iommu_table_group *table_group;
+
+ table_group = iommu_group_get_iommudata(grp);
+ table_group->ops->release_ownership(table_group);
+}
+
+static const struct iommu_domain_ops spapr_tce_blocking_domain_ops = {
+ .attach_dev = spapr_tce_blocking_iommu_attach_dev,
+};
+
+static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap)
+{
+ switch (cap) {
+ case IOMMU_CAP_CACHE_COHERENCY:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type)
+{
+ struct iommu_domain *dom;
+
+ if (type != IOMMU_DOMAIN_BLOCKED)
+ return NULL;
+
+ dom = kzalloc(sizeof(*dom), GFP_KERNEL);
+ if (!dom)
+ return NULL;
+
+ dom->ops = &spapr_tce_blocking_domain_ops;
+
+ return dom;
+}
+
+static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev)
+{
+ struct pci_dev *pdev;
+ struct pci_controller *hose;
+
+ if (!dev_is_pci(dev))
+ return ERR_PTR(-EPERM);
+
+ pdev = to_pci_dev(dev);
+ hose = pdev->bus->sysdata;
+
+ return &hose->iommu;
+}
+
+static void spapr_tce_iommu_release_device(struct device *dev)
+{
+}
+
+static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev)
+{
+ struct pci_controller *hose;
+ struct pci_dev *pdev;
+
+ pdev = to_pci_dev(dev);
+ hose = pdev->bus->sysdata;
+
+ if (!hose->controller_ops.device_group)
+ return ERR_PTR(-ENOENT);
+
+ return hose->controller_ops.device_group(hose, pdev);
+}
+
+static const struct iommu_ops spapr_tce_iommu_ops = {
+ .capable = spapr_tce_iommu_capable,
+ .domain_alloc = spapr_tce_iommu_domain_alloc,
+ .probe_device = spapr_tce_iommu_probe_device,
+ .release_device = spapr_tce_iommu_release_device,
+ .device_group = spapr_tce_iommu_device_group,
+ .set_platform_dma_ops = spapr_tce_blocking_iommu_set_platform_dma,
+};
+
+static struct attribute *spapr_tce_iommu_attrs[] = {
+ NULL,
+};
+
+static struct attribute_group spapr_tce_iommu_group = {
+ .name = "spapr-tce-iommu",
+ .attrs = spapr_tce_iommu_attrs,
+};
+
+static const struct attribute_group *spapr_tce_iommu_groups[] = {
+ &spapr_tce_iommu_group,
+ NULL,
+};
+
+/*
+ * This registers IOMMU devices of PHBs. This needs to happen
+ * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and
+ * before subsys_initcall(iommu_subsys_init).
+ */
+static int __init spapr_tce_setup_phb_iommus_initcall(void)
+{
+ struct pci_controller *hose;
+
+ list_for_each_entry(hose, &hose_list, list_node) {
+ iommu_device_sysfs_add(&hose->iommu, hose->parent,
+ spapr_tce_iommu_groups, "iommu-phb%04x",
+ hose->global_number);
+ iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops,
+ hose->parent);
+ }
+ return 0;
+}
+postcore_initcall_sync(spapr_tce_setup_phb_iommus_initcall);
+
#endif /* CONFIG_IOMMU_API */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index c9535f2760b5..6f7d4edaa0bc 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -206,7 +206,11 @@ static __always_inline void call_do_softirq(const void *sp)
asm volatile (
PPC_STLU " %%r1, %[offset](%[sp]) ;"
"mr %%r1, %[sp] ;"
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ "bl %[callee]@notoc ;"
+#else
"bl %[callee] ;"
+#endif
PPC_LL " %%r1, 0(%%r1) ;"
: // Outputs
: // Inputs
@@ -259,7 +263,11 @@ static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)
PPC_STLU " %%r1, %[offset](%[sp]) ;"
"mr %%r4, %%r1 ;"
"mr %%r1, %[sp] ;"
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ "bl %[callee]@notoc ;"
+#else
"bl %[callee] ;"
+#endif
PPC_LL " %%r1, 0(%%r1) ;"
: // Outputs
"+r" (r3)
diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c
index c788c55512ed..938e66829eae 100644
--- a/arch/powerpc/kernel/irq_64.c
+++ b/arch/powerpc/kernel/irq_64.c
@@ -348,13 +348,12 @@ EXPORT_SYMBOL(arch_local_irq_restore);
* already the case when ppc_md.power_save is called). The function
* will return whether to enter power save or just return.
*
- * In the former case, it will have notified lockdep of interrupts
- * being re-enabled and generally sanitized the lazy irq state,
- * and in the latter case it will leave with interrupts hard
+ * In the former case, it will have generally sanitized the lazy irq
+ * state, and in the latter case it will leave with interrupts hard
* disabled and marked as such, so the local_irq_enable() call
* in arch_cpu_idle() will properly re-enable everything.
*/
-bool prep_irq_for_idle(void)
+__cpuidle bool prep_irq_for_idle(void)
{
/*
* First we need to hard disable to ensure no interrupt
@@ -370,9 +369,6 @@ bool prep_irq_for_idle(void)
if (lazy_irq_pending())
return false;
- /* Tell lockdep we are about to re-enable */
- trace_hardirqs_on();
-
/*
* Mark interrupts as soft-enabled and clear the
* PACA_IRQ_HARD_DIS from the pending mask since we
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
index dc746611ebc0..85bdd7d3652f 100644
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -55,80 +55,49 @@ static void remap_isa_base(phys_addr_t pa, unsigned long size)
}
}
-static void pci_process_ISA_OF_ranges(struct device_node *isa_node,
- unsigned long phb_io_base_phys)
+static int process_ISA_OF_ranges(struct device_node *isa_node,
+ unsigned long phb_io_base_phys)
{
- /* We should get some saner parsing here and remove these structs */
- struct pci_address {
- u32 a_hi;
- u32 a_mid;
- u32 a_lo;
- };
-
- struct isa_address {
- u32 a_hi;
- u32 a_lo;
- };
-
- struct isa_range {
- struct isa_address isa_addr;
- struct pci_address pci_addr;
- unsigned int size;
- };
-
- const struct isa_range *range;
- unsigned long pci_addr;
- unsigned int isa_addr;
unsigned int size;
- int rlen = 0;
+ struct of_range_parser parser;
+ struct of_range range;
- range = of_get_property(isa_node, "ranges", &rlen);
- if (range == NULL || (rlen < sizeof(struct isa_range)))
+ if (of_range_parser_init(&parser, isa_node))
goto inval_range;
- /* From "ISA Binding to 1275"
- * The ranges property is laid out as an array of elements,
- * each of which comprises:
- * cells 0 - 1: an ISA address
- * cells 2 - 4: a PCI address
- * (size depending on dev->n_addr_cells)
- * cell 5: the size of the range
- */
- if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO) {
- range++;
- rlen -= sizeof(struct isa_range);
- if (rlen < sizeof(struct isa_range))
- goto inval_range;
- }
- if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO)
- goto inval_range;
+ for_each_of_range(&parser, &range) {
+ if ((range.flags & ISA_SPACE_MASK) != ISA_SPACE_IO)
+ continue;
- isa_addr = range->isa_addr.a_lo;
- pci_addr = (unsigned long) range->pci_addr.a_mid << 32 |
- range->pci_addr.a_lo;
+ if (range.cpu_addr == OF_BAD_ADDR) {
+ pr_err("ISA: Bad CPU mapping: %s\n", __func__);
+ return -EINVAL;
+ }
- /* Assume these are both zero. Note: We could fix that and
- * do a proper parsing instead ... oh well, that will do for
- * now as nobody uses fancy mappings for ISA bridges
- */
- if ((pci_addr != 0) || (isa_addr != 0)) {
- printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
- __func__);
- return;
- }
+ /* We need page alignment */
+ if ((range.bus_addr & ~PAGE_MASK) || (range.cpu_addr & ~PAGE_MASK)) {
+ pr_warn("ISA: bridge %pOF has non aligned IO range\n", isa_node);
+ return -EINVAL;
+ }
- /* Align size and make sure it's cropped to 64K */
- size = PAGE_ALIGN(range->size);
- if (size > 0x10000)
- size = 0x10000;
+ /* Align size and make sure it's cropped to 64K */
+ size = PAGE_ALIGN(range.size);
+ if (size > 0x10000)
+ size = 0x10000;
- remap_isa_base(phb_io_base_phys, size);
- return;
+ if (!phb_io_base_phys)
+ phb_io_base_phys = range.cpu_addr;
+
+ remap_isa_base(phb_io_base_phys, size);
+ return 0;
+ }
inval_range:
- printk(KERN_ERR "no ISA IO ranges or unexpected isa range, "
- "mapping 64k\n");
- remap_isa_base(phb_io_base_phys, 0x10000);
+ if (!phb_io_base_phys) {
+ pr_err("no ISA IO ranges or unexpected isa range, mapping 64k\n");
+ remap_isa_base(phb_io_base_phys, 0x10000);
+ }
+ return 0;
}
@@ -170,7 +139,7 @@ void __init isa_bridge_find_early(struct pci_controller *hose)
isa_bridge_devnode = np;
/* Now parse the "ranges" property and setup the ISA mapping */
- pci_process_ISA_OF_ranges(np, hose->io_base_phys);
+ process_ISA_OF_ranges(np, hose->io_base_phys);
/* Set the global ISA io base to indicate we have an ISA bridge */
isa_io_base = ISA_IO_BASE;
@@ -186,75 +155,15 @@ void __init isa_bridge_find_early(struct pci_controller *hose)
*/
void __init isa_bridge_init_non_pci(struct device_node *np)
{
- const __be32 *ranges, *pbasep = NULL;
- int rlen, i, rs;
- u32 na, ns, pna;
- u64 cbase, pbase, size = 0;
+ int ret;
/* If we already have an ISA bridge, bail off */
if (isa_bridge_devnode != NULL)
return;
- pna = of_n_addr_cells(np);
- if (of_property_read_u32(np, "#address-cells", &na) ||
- of_property_read_u32(np, "#size-cells", &ns)) {
- pr_warn("ISA: Non-PCI bridge %pOF is missing address format\n",
- np);
- return;
- }
-
- /* Check it's a supported address format */
- if (na != 2 || ns != 1) {
- pr_warn("ISA: Non-PCI bridge %pOF has unsupported address format\n",
- np);
- return;
- }
- rs = na + ns + pna;
-
- /* Grab the ranges property */
- ranges = of_get_property(np, "ranges", &rlen);
- if (ranges == NULL || rlen < rs) {
- pr_warn("ISA: Non-PCI bridge %pOF has absent or invalid ranges\n",
- np);
- return;
- }
-
- /* Parse it. We are only looking for IO space */
- for (i = 0; (i + rs - 1) < rlen; i += rs) {
- if (be32_to_cpup(ranges + i) != 1)
- continue;
- cbase = be32_to_cpup(ranges + i + 1);
- size = of_read_number(ranges + i + na + pna, ns);
- pbasep = ranges + i + na;
- break;
- }
-
- /* Got something ? */
- if (!size || !pbasep) {
- pr_warn("ISA: Non-PCI bridge %pOF has no usable IO range\n",
- np);
+ ret = process_ISA_OF_ranges(np, 0);
+ if (ret)
return;
- }
-
- /* Align size and make sure it's cropped to 64K */
- size = PAGE_ALIGN(size);
- if (size > 0x10000)
- size = 0x10000;
-
- /* Map pbase */
- pbase = of_translate_address(np, pbasep);
- if (pbase == OF_BAD_ADDR) {
- pr_warn("ISA: Non-PCI bridge %pOF failed to translate IO base\n",
- np);
- return;
- }
-
- /* We need page alignment */
- if ((cbase & ~PAGE_MASK) || (pbase & ~PAGE_MASK)) {
- pr_warn("ISA: Non-PCI bridge %pOF has non aligned IO range\n",
- np);
- return;
- }
/* Got it */
isa_bridge_devnode = np;
@@ -263,7 +172,6 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
* and map it
*/
isa_io_base = ISA_IO_BASE;
- remap_isa_base(pbase, size);
pr_debug("ISA: Non-PCI bridge is %pOF\n", np);
}
@@ -282,7 +190,7 @@ static void isa_bridge_find_late(struct pci_dev *pdev,
isa_bridge_pcidev = pdev;
/* Now parse the "ranges" property and setup the ISA mapping */
- pci_process_ISA_OF_ranges(devnode, hose->io_base_phys);
+ process_ISA_OF_ranges(devnode, hose->io_base_phys);
/* Set the global ISA io base to indicate we have an ISA bridge */
isa_io_base = ISA_IO_BASE;
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index f048c424c525..c9ad12461d44 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -171,15 +171,15 @@ static int __init add_legacy_soc_port(struct device_node *np,
/* We only support ports that have a clock frequency properly
* encoded in the device-tree.
*/
- if (of_get_property(np, "clock-frequency", NULL) == NULL)
+ if (!of_property_present(np, "clock-frequency"))
return -1;
/* if reg-offset don't try to use it */
- if ((of_get_property(np, "reg-offset", NULL) != NULL))
+ if (of_property_present(np, "reg-offset"))
return -1;
/* if rtas uses this device, don't try to use it as well */
- if (of_get_property(np, "used-by-rtas", NULL) != NULL)
+ if (of_property_read_bool(np, "used-by-rtas"))
return -1;
/* Get the address */
@@ -237,7 +237,7 @@ static int __init add_legacy_isa_port(struct device_node *np,
* Note: Don't even try on P8 lpc, we know it's not directly mapped
*/
if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc") ||
- of_get_property(isa_brg, "ranges", NULL)) {
+ of_property_present(isa_brg, "ranges")) {
taddr = of_translate_address(np, reg);
if (taddr == OF_BAD_ADDR)
taddr = 0;
@@ -268,7 +268,7 @@ static int __init add_legacy_pci_port(struct device_node *np,
* compatible UARTs on PCI need all sort of quirks (port offsets
* etc...) that this code doesn't know about
*/
- if (of_get_property(np, "clock-frequency", NULL) == NULL)
+ if (!of_property_present(np, "clock-frequency"))
return -1;
/* Get the PCI address. Assume BAR 0 */
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index c39c07a4c06e..2c9ac70aaf0c 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -432,7 +432,7 @@ _GLOBAL(kexec_sequence)
1:
/* copy dest pages, flush whole dest image */
mr r3,r29
- bl kexec_copy_flush /* (image) */
+ bl CFUNC(kexec_copy_flush) /* (image) */
/* turn off mmu now if not done earlier */
cmpdi r26,0
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 2ac78d207f77..92570289ce08 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -101,32 +101,45 @@ static unsigned long stub_func_addr(func_desc_t func)
/* Like PPC32, we need little trampolines to do > 24-bit jumps (into
the kernel itself). But on PPC64, these need to be used for every
jump, actually, to reset r2 (TOC+0x8000). */
-struct ppc64_stub_entry
-{
- /* 28 byte jump instruction sequence (7 instructions). We only
- * need 6 instructions on ABIv2 but we always allocate 7 so
- * so we don't have to modify the trampoline load instruction. */
+struct ppc64_stub_entry {
+ /*
+ * 28 byte jump instruction sequence (7 instructions) that can
+ * hold ppc64_stub_insns or stub_insns. Must be 8-byte aligned
+ * with PCREL kernels that use prefix instructions in the stub.
+ */
u32 jump[7];
/* Used by ftrace to identify stubs */
u32 magic;
/* Data for the above code */
func_desc_t funcdata;
+} __aligned(8);
+
+struct ppc64_got_entry {
+ u64 addr;
};
/*
* PPC64 uses 24 bit jumps, but we need to jump into other modules or
* the kernel which may be further. So we jump to a stub.
*
- * For ELFv1 we need to use this to set up the new r2 value (aka TOC
- * pointer). For ELFv2 it's the callee's responsibility to set up the
- * new r2, but for both we need to save the old r2.
+ * Target address and TOC are loaded from function descriptor in the
+ * ppc64_stub_entry.
+ *
+ * r12 is used to generate the target address, which is required for the
+ * ELFv2 global entry point calling convention.
*
- * We could simply patch the new r2 value and function pointer into
- * the stub, but it's significantly shorter to put these values at the
- * end of the stub code, and patch the stub address (32-bits relative
- * to the TOC ptr, r2) into the stub.
+ * TOC handling:
+ * - PCREL does not have a TOC.
+ * - ELFv2 non-PCREL just has to save r2, the callee is responsible for
+ * setting its own TOC pointer at the global entry address.
+ * - ELFv1 must load the new TOC pointer from the function descriptor.
*/
static u32 ppc64_stub_insns[] = {
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ /* pld r12,addr */
+ PPC_PREFIX_8LS | __PPC_PRFX_R(1),
+ PPC_INST_PLD | ___PPC_RT(_R12),
+#else
PPC_RAW_ADDIS(_R11, _R2, 0),
PPC_RAW_ADDI(_R11, _R11, 0),
/* Save current r2 value in magic place on the stack. */
@@ -136,13 +149,17 @@ static u32 ppc64_stub_insns[] = {
/* Set up new r2 from function descriptor */
PPC_RAW_LD(_R2, _R11, 40),
#endif
+#endif
PPC_RAW_MTCTR(_R12),
PPC_RAW_BCTR(),
};
-/* Count how many different 24-bit relocations (different symbol,
- different addend) */
-static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)
+/*
+ * Count how many different r_type relocations (different symbol,
+ * different addend).
+ */
+static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num,
+ unsigned long r_type)
{
unsigned int i, r_info, r_addend, _count_relocs;
@@ -151,8 +168,8 @@ static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)
r_info = 0;
r_addend = 0;
for (i = 0; i < num; i++)
- /* Only count 24-bit relocs, others don't need stubs */
- if (ELF64_R_TYPE(rela[i].r_info) == R_PPC_REL24 &&
+ /* Only count r_type relocs, others don't need stubs */
+ if (ELF64_R_TYPE(rela[i].r_info) == r_type &&
(r_info != ELF64_R_SYM(rela[i].r_info) ||
r_addend != rela[i].r_addend)) {
_count_relocs++;
@@ -213,7 +230,14 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
relocs += count_relocs((void *)sechdrs[i].sh_addr,
sechdrs[i].sh_size
- / sizeof(Elf64_Rela));
+ / sizeof(Elf64_Rela),
+ R_PPC_REL24);
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ relocs += count_relocs((void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size
+ / sizeof(Elf64_Rela),
+ R_PPC64_REL24_NOTOC);
+#endif
}
}
@@ -230,6 +254,95 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
return relocs * sizeof(struct ppc64_stub_entry);
}
+#ifdef CONFIG_PPC_KERNEL_PCREL
+static int count_pcpu_relocs(const Elf64_Shdr *sechdrs,
+ const Elf64_Rela *rela, unsigned int num,
+ unsigned int symindex, unsigned int pcpu)
+{
+ unsigned int i, r_info, r_addend, _count_relocs;
+
+ _count_relocs = 0;
+ r_info = 0;
+ r_addend = 0;
+
+ for (i = 0; i < num; i++) {
+ Elf64_Sym *sym;
+
+ /* This is the symbol it is referring to */
+ sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
+ + ELF64_R_SYM(rela[i].r_info);
+
+ if (sym->st_shndx == pcpu &&
+ (r_info != ELF64_R_SYM(rela[i].r_info) ||
+ r_addend != rela[i].r_addend)) {
+ _count_relocs++;
+ r_info = ELF64_R_SYM(rela[i].r_info);
+ r_addend = rela[i].r_addend;
+ }
+ }
+
+ return _count_relocs;
+}
+
+/* Get size of potential GOT required. */
+static unsigned long get_got_size(const Elf64_Ehdr *hdr,
+ const Elf64_Shdr *sechdrs,
+ struct module *me)
+{
+ /* One extra reloc so it's always 0-addr terminated */
+ unsigned long relocs = 1;
+ unsigned int i, symindex = 0;
+
+ for (i = 1; i < hdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type == SHT_SYMTAB) {
+ symindex = i;
+ break;
+ }
+ }
+ WARN_ON_ONCE(!symindex);
+
+ /* Every relocated section... */
+ for (i = 1; i < hdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type == SHT_RELA) {
+ pr_debug("Found relocations in section %u\n", i);
+ pr_debug("Ptr: %p. Number: %llu\n", (void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size / sizeof(Elf64_Rela));
+
+ /*
+ * Sort the relocation information based on a symbol and
+ * addend key. This is a stable O(n*log n) complexity
+ * algorithm but it will reduce the complexity of
+ * count_relocs() to linear complexity O(n)
+ */
+ sort((void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size / sizeof(Elf64_Rela),
+ sizeof(Elf64_Rela), relacmp, NULL);
+
+ relocs += count_relocs((void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size
+ / sizeof(Elf64_Rela),
+ R_PPC64_GOT_PCREL34);
+
+ /*
+ * Percpu data access typically gets linked with
+ * REL34 relocations, but the percpu section gets
+ * moved at load time and requires that to be
+ * converted to GOT linkage.
+ */
+ if (IS_ENABLED(CONFIG_SMP) && symindex)
+ relocs += count_pcpu_relocs(sechdrs,
+ (void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size
+ / sizeof(Elf64_Rela),
+ symindex, me->arch.pcpu_section);
+ }
+ }
+
+ pr_debug("Looks like a total of %lu GOT entries, max\n", relocs);
+ return relocs * sizeof(struct ppc64_got_entry);
+}
+#else /* CONFIG_PPC_KERNEL_PCREL */
+
/* Still needed for ELFv2, for .TOC. */
static void dedotify_versions(struct modversion_info *vers,
unsigned long size)
@@ -279,6 +392,7 @@ static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs,
}
return NULL;
}
+#endif /* CONFIG_PPC_KERNEL_PCREL */
bool module_init_section(const char *name)
{
@@ -297,6 +411,15 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
for (i = 1; i < hdr->e_shnum; i++) {
if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0)
me->arch.stubs_section = i;
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ else if (strcmp(secstrings + sechdrs[i].sh_name, ".data..percpu") == 0)
+ me->arch.pcpu_section = i;
+ else if (strcmp(secstrings + sechdrs[i].sh_name, ".mygot") == 0) {
+ me->arch.got_section = i;
+ if (sechdrs[i].sh_addralign < 8)
+ sechdrs[i].sh_addralign = 8;
+ }
+#else
else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) {
me->arch.toc_section = i;
if (sechdrs[i].sh_addralign < 8)
@@ -311,6 +434,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
sechdrs[i].sh_size / sizeof(Elf64_Sym),
(void *)hdr
+ sechdrs[sechdrs[i].sh_link].sh_offset);
+#endif
}
if (!me->arch.stubs_section) {
@@ -318,26 +442,47 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
return -ENOEXEC;
}
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ if (!me->arch.got_section) {
+ pr_err("%s: doesn't contain .mygot.\n", me->name);
+ return -ENOEXEC;
+ }
+
+ /* Override the got size */
+ sechdrs[me->arch.got_section].sh_size = get_got_size(hdr, sechdrs, me);
+#else
/* If we don't have a .toc, just use .stubs. We need to set r2
to some reasonable value in case the module calls out to
other functions via a stub, or if a function pointer escapes
the module by some means. */
if (!me->arch.toc_section)
me->arch.toc_section = me->arch.stubs_section;
+#endif
/* Override the stubs size */
sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs);
+
return 0;
}
#ifdef CONFIG_MPROFILE_KERNEL
static u32 stub_insns[] = {
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernelbase)),
+ PPC_RAW_NOP(), /* align the prefix insn */
+ /* paddi r12,r12,addr */
+ PPC_PREFIX_MLS | __PPC_PRFX_R(0),
+ PPC_INST_PADDI | ___PPC_RT(_R12) | ___PPC_RA(_R12),
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR(),
+#else
PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)),
PPC_RAW_ADDIS(_R12, _R12, 0),
PPC_RAW_ADDI(_R12, _R12, 0),
PPC_RAW_MTCTR(_R12),
PPC_RAW_BCTR(),
+#endif
};
/*
@@ -358,18 +503,37 @@ static inline int create_ftrace_stub(struct ppc64_stub_entry *entry,
{
long reladdr;
- memcpy(entry->jump, stub_insns, sizeof(stub_insns));
-
- /* Stub uses address relative to kernel toc (from the paca) */
- reladdr = addr - kernel_toc_addr();
- if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
- pr_err("%s: Address of %ps out of range of kernel_toc.\n",
- me->name, (void *)addr);
+ if ((unsigned long)entry->jump % 8 != 0) {
+ pr_err("%s: Address of stub entry is not 8-byte aligned\n", me->name);
return 0;
}
- entry->jump[1] |= PPC_HA(reladdr);
- entry->jump[2] |= PPC_LO(reladdr);
+ BUILD_BUG_ON(sizeof(stub_insns) > sizeof(entry->jump));
+ memcpy(entry->jump, stub_insns, sizeof(stub_insns));
+
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
+ /* Stub uses address relative to kernel base (from the paca) */
+ reladdr = addr - local_paca->kernelbase;
+ if (reladdr > 0x1FFFFFFFFL || reladdr < -0x200000000L) {
+ pr_err("%s: Address of %ps out of range of 34-bit relative address.\n",
+ me->name, (void *)addr);
+ return 0;
+ }
+
+ entry->jump[2] |= IMM_H18(reladdr);
+ entry->jump[3] |= IMM_L(reladdr);
+ } else {
+ /* Stub uses address relative to kernel toc (from the paca) */
+ reladdr = addr - kernel_toc_addr();
+ if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+ pr_err("%s: Address of %ps out of range of kernel_toc.\n",
+ me->name, (void *)addr);
+ return 0;
+ }
+
+ entry->jump[1] |= PPC_HA(reladdr);
+ entry->jump[2] |= PPC_LO(reladdr);
+ }
/* Even though we don't use funcdata in the stub, it's needed elsewhere. */
entry->funcdata = func_desc(addr);
@@ -415,7 +579,11 @@ static bool is_mprofile_ftrace_call(const char *name)
*/
static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me)
{
+#ifndef CONFIG_PPC_KERNEL_PCREL
return (sechdrs[me->arch.toc_section].sh_addr & ~0xfful) + 0x8000;
+#else
+ return -1;
+#endif
}
/* Patch stub to reference function and correct r2 value. */
@@ -432,28 +600,53 @@ static inline int create_stub(const Elf64_Shdr *sechdrs,
if (is_mprofile_ftrace_call(name))
return create_ftrace_stub(entry, addr, me);
+ if ((unsigned long)entry->jump % 8 != 0) {
+ pr_err("%s: Address of stub entry is not 8-byte aligned\n", me->name);
+ return 0;
+ }
+
+ BUILD_BUG_ON(sizeof(ppc64_stub_insns) > sizeof(entry->jump));
for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) {
if (patch_instruction(&entry->jump[i],
ppc_inst(ppc64_stub_insns[i])))
return 0;
}
- /* Stub uses address relative to r2. */
- reladdr = (unsigned long)entry - my_r2(sechdrs, me);
- if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
- pr_err("%s: Address %p of stub out of range of %p.\n",
- me->name, (void *)reladdr, (void *)my_r2);
- return 0;
- }
- pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr);
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
+ /* Stub uses address relative to itself! */
+ reladdr = 0 + offsetof(struct ppc64_stub_entry, funcdata);
+ BUILD_BUG_ON(reladdr != 32);
+ if (reladdr > 0x1FFFFFFFFL || reladdr < -0x200000000L) {
+ pr_err("%s: Address of %p out of range of 34-bit relative address.\n",
+ me->name, (void *)reladdr);
+ return 0;
+ }
+ pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr);
- if (patch_instruction(&entry->jump[0],
- ppc_inst(entry->jump[0] | PPC_HA(reladdr))))
- return 0;
+ /* May not even need this if we're relative to 0 */
+ if (patch_instruction(&entry->jump[0],
+ ppc_inst_prefix(entry->jump[0] | IMM_H18(reladdr),
+ entry->jump[1] | IMM_L(reladdr))))
+ return 0;
- if (patch_instruction(&entry->jump[1],
- ppc_inst(entry->jump[1] | PPC_LO(reladdr))))
- return 0;
+ } else {
+ /* Stub uses address relative to r2. */
+ reladdr = (unsigned long)entry - my_r2(sechdrs, me);
+ if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+ pr_err("%s: Address %p of stub out of range of %p.\n",
+ me->name, (void *)reladdr, (void *)my_r2);
+ return 0;
+ }
+ pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr);
+
+ if (patch_instruction(&entry->jump[0],
+ ppc_inst(entry->jump[0] | PPC_HA(reladdr))))
+ return 0;
+
+ if (patch_instruction(&entry->jump[1],
+ ppc_inst(entry->jump[1] | PPC_LO(reladdr))))
+ return 0;
+ }
// func_desc_t is 8 bytes if ABIv2, else 16 bytes
desc = func_desc(addr);
@@ -497,6 +690,37 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs,
return (unsigned long)&stubs[i];
}
+#ifdef CONFIG_PPC_KERNEL_PCREL
+/* Create GOT to load the location described in this ptr */
+static unsigned long got_for_addr(const Elf64_Shdr *sechdrs,
+ unsigned long addr,
+ struct module *me,
+ const char *name)
+{
+ struct ppc64_got_entry *got;
+ unsigned int i, num_got;
+
+ if (!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
+ return addr;
+
+ num_got = sechdrs[me->arch.got_section].sh_size / sizeof(*got);
+
+ /* Find this stub, or if that fails, the next avail. entry */
+ got = (void *)sechdrs[me->arch.got_section].sh_addr;
+ for (i = 0; got[i].addr; i++) {
+ if (WARN_ON(i >= num_got))
+ return 0;
+
+ if (got[i].addr == addr)
+ return (unsigned long)&got[i];
+ }
+
+ got[i].addr = addr;
+
+ return (unsigned long)&got[i];
+}
+#endif
+
/* We expect a noop next: if it is, replace it with instruction to
restore r2. */
static int restore_r2(const char *name, u32 *instruction, struct module *me)
@@ -504,6 +728,9 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me)
u32 *prev_insn = instruction - 1;
u32 insn_val = *instruction;
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
+ return 0;
+
if (is_mprofile_ftrace_call(name))
return 0;
@@ -549,6 +776,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
pr_debug("Applying ADD relocate section %u to %u\n", relsec,
sechdrs[relsec].sh_info);
+#ifndef CONFIG_PPC_KERNEL_PCREL
/* First time we're called, we can fix up .TOC. */
if (!me->arch.toc_fixed) {
sym = find_dot_toc(sechdrs, strtab, symindex);
@@ -558,7 +786,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
sym->st_value = my_r2(sechdrs, me);
me->arch.toc_fixed = true;
}
-
+#endif
for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
/* This is where to make the change */
location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
@@ -586,6 +814,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
*(unsigned long *)location = value;
break;
+#ifndef CONFIG_PPC_KERNEL_PCREL
case R_PPC64_TOC:
*(unsigned long *)location = my_r2(sechdrs, me);
break;
@@ -645,8 +874,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
= (*((uint16_t *) location) & ~0xffff)
| (value & 0xffff);
break;
+#endif
case R_PPC_REL24:
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ /* PCREL still generates REL24 for mcount */
+ case R_PPC64_REL24_NOTOC:
+#endif
/* FIXME: Handle weak symbols here --RR */
if (sym->st_shndx == SHN_UNDEF ||
sym->st_shndx == SHN_LIVEPATCH) {
@@ -694,6 +928,47 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
*(u32 *)location = value;
break;
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ case R_PPC64_PCREL34: {
+ unsigned long absvalue = value;
+
+ /* Convert value to relative */
+ value -= (unsigned long)location;
+
+ if (value + 0x200000000 > 0x3ffffffff) {
+ if (sym->st_shndx != me->arch.pcpu_section) {
+ pr_err("%s: REL34 %li out of range!\n",
+ me->name, (long)value);
+ return -ENOEXEC;
+ }
+
+ /*
+ * per-cpu section is special cased because
+ * it is moved during loading, so has to be
+ * converted to use GOT.
+ */
+ value = got_for_addr(sechdrs, absvalue, me,
+ strtab + sym->st_name);
+ if (!value)
+ return -ENOENT;
+ value -= (unsigned long)location;
+
+ /* Turn pla into pld */
+ if (patch_instruction((u32 *)location,
+ ppc_inst_prefix((*(u32 *)location & ~0x02000000),
+ (*((u32 *)location + 1) & ~0xf8000000) | 0xe4000000)))
+ return -EFAULT;
+ }
+
+ if (patch_instruction((u32 *)location,
+ ppc_inst_prefix((*(u32 *)location & ~0x3ffff) | IMM_H18(value),
+ (*((u32 *)location + 1) & ~0xffff) | IMM_L(value))))
+ return -EFAULT;
+
+ break;
+ }
+
+#else
case R_PPC64_TOCSAVE:
/*
* Marker reloc indicates we don't have to save r2.
@@ -701,8 +976,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
* it.
*/
break;
+#endif
case R_PPC64_ENTRY:
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
+ break;
+
/*
* Optimize ELFv2 large code model entry point if
* the TOC is within 2GB range of current location.
@@ -745,6 +1024,20 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
| (value & 0xffff);
break;
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ case R_PPC64_GOT_PCREL34:
+ value = got_for_addr(sechdrs, value, me,
+ strtab + sym->st_name);
+ if (!value)
+ return -ENOENT;
+ value -= (unsigned long)location;
+ ((uint32_t *)location)[0] = (((uint32_t *)location)[0] & ~0x3ffff) |
+ ((value >> 16) & 0x3ffff);
+ ((uint32_t *)location)[1] = (((uint32_t *)location)[1] & ~0xffff) |
+ (value & 0xffff);
+ break;
+#endif
+
default:
pr_err("%s: Unknown ADD relocation: %lu\n",
me->name,
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index be8db402e963..cda4e00b67c1 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -191,7 +191,9 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
#endif
new_paca->lock_token = 0x8000;
new_paca->paca_index = cpu;
+#ifndef CONFIG_PPC_KERNEL_PCREL
new_paca->kernel_toc = kernel_toc_addr();
+#endif
new_paca->kernelbase = (unsigned long) _stext;
/* Only set MSR:IR/DR when MMU is initialized */
new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR);
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index fd42059ae2a5..e27342ef128b 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -73,7 +73,7 @@ static int __init pcibios_init(void)
return 0;
}
-subsys_initcall(pcibios_init);
+subsys_initcall_sync(pcibios_init);
int pcibios_unmap_io_space(struct pci_bus *bus)
{
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4b29ac5ddac6..1fefafb2b29b 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1630,7 +1630,7 @@ void arch_setup_new_exec(void)
}
#ifdef CONFIG_PPC64
-/**
+/*
* Assign a TIDR (thread ID) for task @t and set it in the thread
* structure. For now, we only support setting TIDR for 'current' task.
*
@@ -1738,68 +1738,83 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
*/
int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
- unsigned long clone_flags = args->flags;
- unsigned long usp = args->stack;
- unsigned long tls = args->tls;
- struct pt_regs *childregs, *kregs;
+ struct pt_regs *kregs; /* Switch frame regs */
extern void ret_from_fork(void);
extern void ret_from_fork_scv(void);
- extern void ret_from_kernel_thread(void);
+ extern void ret_from_kernel_user_thread(void);
+ extern void start_kernel_thread(void);
void (*f)(void);
unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
- struct thread_info *ti = task_thread_info(p);
#ifdef CONFIG_HAVE_HW_BREAKPOINT
int i;
#endif
klp_init_thread_info(p);
- /* Create initial stack frame. */
- sp -= STACK_USER_INT_FRAME_SIZE;
- *(unsigned long *)(sp + STACK_INT_FRAME_MARKER) = STACK_FRAME_REGS_MARKER;
-
- /* Copy registers */
- childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS);
- if (unlikely(args->fn)) {
+ if (unlikely(p->flags & PF_KTHREAD)) {
/* kernel thread */
+
+ /* Create initial minimum stack frame. */
+ sp -= STACK_FRAME_MIN_SIZE;
((unsigned long *)sp)[0] = 0;
- memset(childregs, 0, sizeof(struct pt_regs));
- childregs->gpr[1] = sp + STACK_USER_INT_FRAME_SIZE;
- /* function */
- if (args->fn)
- childregs->gpr[14] = ppc_function_entry((void *)args->fn);
-#ifdef CONFIG_PPC64
- clear_tsk_thread_flag(p, TIF_32BIT);
- childregs->softe = IRQS_ENABLED;
-#endif
- childregs->gpr[15] = (unsigned long)args->fn_arg;
+
+ f = start_kernel_thread;
p->thread.regs = NULL; /* no user register state */
- ti->flags |= _TIF_RESTOREALL;
- f = ret_from_kernel_thread;
+ clear_tsk_compat_task(p);
} else {
/* user thread */
- struct pt_regs *regs = current_pt_regs();
- *childregs = *regs;
- if (usp)
- childregs->gpr[1] = usp;
- ((unsigned long *)sp)[0] = childregs->gpr[1];
- p->thread.regs = childregs;
- /* 64s sets this in ret_from_fork */
- if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64))
- childregs->gpr[3] = 0; /* Result from fork() */
- if (clone_flags & CLONE_SETTLS) {
- if (!is_32bit_task())
- childregs->gpr[13] = tls;
+ struct pt_regs *childregs;
+
+ /* Create initial user return stack frame. */
+ sp -= STACK_USER_INT_FRAME_SIZE;
+ *(unsigned long *)(sp + STACK_INT_FRAME_MARKER) = STACK_FRAME_REGS_MARKER;
+
+ childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS);
+
+ if (unlikely(args->fn)) {
+ /*
+ * A user space thread, but it first runs a kernel
+ * thread, and then returns as though it had called
+ * execve rather than fork, so user regs will be
+ * filled in (e.g., by kernel_execve()).
+ */
+ ((unsigned long *)sp)[0] = 0;
+ memset(childregs, 0, sizeof(struct pt_regs));
+#ifdef CONFIG_PPC64
+ childregs->softe = IRQS_ENABLED;
+#endif
+ f = ret_from_kernel_user_thread;
+ } else {
+ struct pt_regs *regs = current_pt_regs();
+ unsigned long clone_flags = args->flags;
+ unsigned long usp = args->stack;
+
+ /* Copy registers */
+ *childregs = *regs;
+ if (usp)
+ childregs->gpr[1] = usp;
+ ((unsigned long *)sp)[0] = childregs->gpr[1];
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ WARN_ON_ONCE(childregs->softe != IRQS_ENABLED);
+#endif
+ if (clone_flags & CLONE_SETTLS) {
+ unsigned long tls = args->tls;
+
+ if (!is_32bit_task())
+ childregs->gpr[13] = tls;
+ else
+ childregs->gpr[2] = tls;
+ }
+
+ if (trap_is_scv(regs))
+ f = ret_from_fork_scv;
else
- childregs->gpr[2] = tls;
+ f = ret_from_fork;
}
- if (trap_is_scv(regs))
- f = ret_from_fork_scv;
- else
- f = ret_from_fork;
+ childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
+ p->thread.regs = childregs;
}
- childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
/*
* The way this works is that at some point in the future
@@ -1813,6 +1828,16 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
sp -= STACK_SWITCH_FRAME_SIZE;
((unsigned long *)sp)[0] = sp + STACK_SWITCH_FRAME_SIZE;
kregs = (struct pt_regs *)(sp + STACK_SWITCH_FRAME_REGS);
+ kregs->nip = ppc_function_entry(f);
+ if (unlikely(args->fn)) {
+ /*
+ * Put kthread fn, arg parameters in non-volatile GPRs in the
+ * switch frame so they are loaded by _switch before it returns
+ * to ret_from_kernel_thread.
+ */
+ kregs->gpr[14] = ppc_function_entry((void *)args->fn);
+ kregs->gpr[15] = (unsigned long)args->fn_arg;
+ }
p->thread.ksp = sp;
#ifdef CONFIG_HAVE_HW_BREAKPOINT
@@ -1840,22 +1865,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
p->thread.dscr_inherit = current->thread.dscr_inherit;
p->thread.dscr = mfspr(SPRN_DSCR);
}
- if (cpu_has_feature(CPU_FTR_HAS_PPR))
- childregs->ppr = DEFAULT_PPR;
p->thread.tidr = 0;
#endif
- /*
- * Run with the current AMR value of the kernel
- */
-#ifdef CONFIG_PPC_PKEY
- if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
- kregs->amr = AMR_KUAP_BLOCKED;
-
- if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP))
- kregs->iamr = AMR_KUEP_BLOCKED;
-#endif
- kregs->nip = ppc_function_entry(f);
return 0;
}
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 31175b34856a..c087eeee320f 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -16,6 +16,7 @@
#include <linux/init.h>
#include <linux/kconfig.h>
#include <linux/kernel.h>
+#include <linux/lockdep.h>
#include <linux/memblock.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
@@ -68,7 +69,7 @@ struct rtas_filter {
* functions are believed to have no users on
* ppc64le, and we want to keep it that way. It does
* not make sense for this to be set when @filter
- * is false.
+ * is NULL.
*/
struct rtas_function {
s32 token;
@@ -453,6 +454,16 @@ static struct rtas_function rtas_function_table[] __ro_after_init = {
},
};
+/*
+ * Nearly all RTAS calls need to be serialized. All uses of the
+ * default rtas_args block must hold rtas_lock.
+ *
+ * Exceptions to the RTAS serialization requirement (e.g. stop-self)
+ * must use a separate rtas_args structure.
+ */
+static DEFINE_RAW_SPINLOCK(rtas_lock);
+static struct rtas_args rtas_args;
+
/**
* rtas_function_token() - RTAS function token lookup.
* @handle: Function handle, e.g. RTAS_FN_EVENT_SCAN.
@@ -560,6 +571,9 @@ static void __do_enter_rtas(struct rtas_args *args)
static void __do_enter_rtas_trace(struct rtas_args *args)
{
const char *name = NULL;
+
+ if (args == &rtas_args)
+ lockdep_assert_held(&rtas_lock);
/*
* If the tracepoints that consume the function name aren't
* active, avoid the lookup.
@@ -619,16 +633,6 @@ static void do_enter_rtas(struct rtas_args *args)
struct rtas_t rtas;
-/*
- * Nearly all RTAS calls need to be serialized. All uses of the
- * default rtas_args block must hold rtas_lock.
- *
- * Exceptions to the RTAS serialization requirement (e.g. stop-self)
- * must use a separate rtas_args structure.
- */
-static DEFINE_RAW_SPINLOCK(rtas_lock);
-static struct rtas_args rtas_args;
-
DEFINE_SPINLOCK(rtas_data_buf_lock);
EXPORT_SYMBOL_GPL(rtas_data_buf_lock);
@@ -951,6 +955,8 @@ static char *__fetch_rtas_last_error(char *altbuf)
u32 bufsz;
char *buf = NULL;
+ lockdep_assert_held(&rtas_lock);
+
if (token == -1)
return NULL;
@@ -981,7 +987,7 @@ static char *__fetch_rtas_last_error(char *altbuf)
buf = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC);
}
if (buf)
- memcpy(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX);
+ memmove(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX);
}
return buf;
@@ -1016,6 +1022,23 @@ va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret,
do_enter_rtas(args);
}
+/**
+ * rtas_call_unlocked() - Invoke an RTAS firmware function without synchronization.
+ * @args: RTAS parameter block to be used for the call, must obey RTAS addressing
+ * constraints.
+ * @token: Identifies the function being invoked.
+ * @nargs: Number of input parameters. Does not include token.
+ * @nret: Number of output parameters, including the call status.
+ * @....: List of @nargs input parameters.
+ *
+ * Invokes the RTAS function indicated by @token, which the caller
+ * should obtain via rtas_function_token().
+ *
+ * This function is similar to rtas_call(), but must be used with a
+ * limited set of RTAS calls specifically exempted from the general
+ * requirement that only one RTAS call may be in progress at any
+ * time. Examples include stop-self and ibm,nmi-interlock.
+ */
void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...)
{
va_list list;
@@ -1091,6 +1114,7 @@ static bool token_is_restricted_errinjct(s32 token)
*/
int rtas_call(int token, int nargs, int nret, int *outputs, ...)
{
+ struct pin_cookie cookie;
va_list list;
int i;
unsigned long flags;
@@ -1117,6 +1141,8 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
}
raw_spin_lock_irqsave(&rtas_lock, flags);
+ cookie = lockdep_pin_lock(&rtas_lock);
+
/* We use the global rtas args buffer */
args = &rtas_args;
@@ -1134,6 +1160,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
outputs[i] = be32_to_cpu(args->rets[i + 1]);
ret = (nret > 0) ? be32_to_cpu(args->rets[0]) : 0;
+ lockdep_unpin_lock(&rtas_lock, cookie);
raw_spin_unlock_irqrestore(&rtas_lock, flags);
if (buff_copy) {
@@ -1765,6 +1792,7 @@ err:
/* We assume to be passed big endian arguments */
SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
{
+ struct pin_cookie cookie;
struct rtas_args args;
unsigned long flags;
char *buff_copy, *errbuf = NULL;
@@ -1833,6 +1861,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
buff_copy = get_errorlog_buffer();
raw_spin_lock_irqsave(&rtas_lock, flags);
+ cookie = lockdep_pin_lock(&rtas_lock);
rtas_args = args;
do_enter_rtas(&rtas_args);
@@ -1843,6 +1872,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
if (be32_to_cpu(args.rets[0]) == -1)
errbuf = __fetch_rtas_last_error(buff_copy);
+ lockdep_unpin_lock(&rtas_lock, cookie);
raw_spin_unlock_irqrestore(&rtas_lock, flags);
if (buff_copy) {
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index e77734e5a127..d2a446216444 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -630,13 +630,14 @@ static __init void probe_machine(void)
for (machine_id = &__machine_desc_start;
machine_id < &__machine_desc_end;
machine_id++) {
- DBG(" %s ...", machine_id->name);
+ DBG(" %s ...\n", machine_id->name);
+ if (machine_id->compatible && !of_machine_is_compatible(machine_id->compatible))
+ continue;
memcpy(&ppc_md, machine_id, sizeof(struct machdep_calls));
- if (ppc_md.probe()) {
- DBG(" match !\n");
- break;
- }
- DBG("\n");
+ if (ppc_md.probe && !ppc_md.probe())
+ continue;
+ DBG(" %s match !\n", machine_id->name);
+ break;
}
/* What can we do if we didn't find ? */
if (machine_id >= &__machine_desc_end) {
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 9d8665910350..df20cf201f74 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -887,7 +887,11 @@ void __init time_init(void)
unsigned shift;
/* Normal PowerPC with timebase register */
- ppc_md.calibrate_decr();
+ if (ppc_md.calibrate_decr)
+ ppc_md.calibrate_decr();
+ else
+ generic_calibrate_decr();
+
printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n",
ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);
printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n",
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 7b85c3b460a3..a47f30373423 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -194,6 +194,8 @@ __ftrace_make_nop(struct module *mod,
* get corrupted.
*
* Use a b +8 to jump over the load.
+ * XXX: could make PCREL depend on MPROFILE_KERNEL
+ * XXX: check PCREL && MPROFILE_KERNEL calling sequence
*/
if (IS_ENABLED(CONFIG_MPROFILE_KERNEL) || IS_ENABLED(CONFIG_PPC32))
pop = ppc_inst(PPC_RAW_NOP());
@@ -725,6 +727,15 @@ int __init ftrace_dyn_arch_init(void)
{
int i;
unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init };
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ u32 stub_insns[] = {
+ /* pla r12,addr */
+ PPC_PREFIX_MLS | __PPC_PRFX_R(1),
+ PPC_INST_PADDI | ___PPC_RT(_R12),
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR()
+ };
+#else
u32 stub_insns[] = {
PPC_RAW_LD(_R12, _R13, PACATOC),
PPC_RAW_ADDIS(_R12, _R12, 0),
@@ -732,6 +743,8 @@ int __init ftrace_dyn_arch_init(void)
PPC_RAW_MTCTR(_R12),
PPC_RAW_BCTR()
};
+#endif
+
unsigned long addr;
long reladdr;
@@ -740,19 +753,36 @@ int __init ftrace_dyn_arch_init(void)
else
addr = ppc_global_function_entry((void *)ftrace_caller);
- reladdr = addr - kernel_toc_addr();
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
+ for (i = 0; i < 2; i++) {
+ reladdr = addr - (unsigned long)tramp[i];
- if (reladdr >= SZ_2G || reladdr < -(long)SZ_2G) {
- pr_err("Address of %ps out of range of kernel_toc.\n",
+ if (reladdr >= (long)SZ_8G || reladdr < -(long)SZ_8G) {
+ pr_err("Address of %ps out of range of pcrel address.\n",
+ (void *)addr);
+ return -1;
+ }
+
+ memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+ tramp[i][0] |= IMM_H18(reladdr);
+ tramp[i][1] |= IMM_L(reladdr);
+ add_ftrace_tramp((unsigned long)tramp[i]);
+ }
+ } else {
+ reladdr = addr - kernel_toc_addr();
+
+ if (reladdr >= (long)SZ_2G || reladdr < -(long)SZ_2G) {
+ pr_err("Address of %ps out of range of kernel_toc.\n",
(void *)addr);
- return -1;
- }
+ return -1;
+ }
- for (i = 0; i < 2; i++) {
- memcpy(tramp[i], stub_insns, sizeof(stub_insns));
- tramp[i][1] |= PPC_HA(reladdr);
- tramp[i][2] |= PPC_LO(reladdr);
- add_ftrace_tramp((unsigned long)tramp[i]);
+ for (i = 0; i < 2; i++) {
+ memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+ tramp[i][1] |= PPC_HA(reladdr);
+ tramp[i][2] |= PPC_LO(reladdr);
+ add_ftrace_tramp((unsigned long)tramp[i]);
+ }
}
return 0;
diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S
index 0c4ecc8fec5a..48fc6658053a 100644
--- a/arch/powerpc/kernel/vdso/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso/gettimeofday.S
@@ -38,7 +38,11 @@
.else
addi r4, r5, VDSO_DATA_OFFSET
.endif
- bl DOTSYM(\funct)
+#ifdef __powerpc64__
+ bl CFUNC(DOTSYM(\funct))
+#else
+ bl \funct
+#endif
PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
#ifdef __powerpc64__
PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1)
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index ffe5d90abe17..fcc0ad6d9c7b 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -177,10 +177,16 @@ fpone:
fphalf:
.quad 0x3fe0000000000000 /* 0.5 */
+#ifdef CONFIG_PPC_KERNEL_PCREL
+#define LDCONST(fr, name) \
+ pla r11,name@pcrel; \
+ lfd fr,0(r11)
+#else
#define LDCONST(fr, name) \
addis r11,r2,name@toc@ha; \
lfd fr,name@toc@l(r11)
#endif
+#endif
.text
/*
* Internal routine to enable floating point and set FPSCR to 0.
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index ee86753e444e..13614f0b269c 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -169,12 +169,18 @@ SECTIONS
}
#else /* CONFIG_PPC32 */
+#ifndef CONFIG_PPC_KERNEL_PCREL
.toc1 : AT(ADDR(.toc1) - LOAD_OFFSET) {
*(.toc1)
}
+#endif
.got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) {
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ *(.got)
+#else
*(.got .toc)
+#endif
}
SOFT_MASK_TABLE(8)