summaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig8
-rw-r--r--arch/powerpc/Kconfig.debug6
-rw-r--r--arch/powerpc/boot/dts/acadia.dts2
-rw-r--r--arch/powerpc/configs/powernv_defconfig2
-rw-r--r--arch/powerpc/configs/pseries_defconfig1
-rw-r--r--arch/powerpc/configs/skiroot_defconfig232
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-hash.h22
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h3
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush.h15
-rw-r--r--arch/powerpc/include/asm/cputable.h12
-rw-r--r--arch/powerpc/include/asm/eeh.h10
-rw-r--r--arch/powerpc/include/asm/emulated_ops.h4
-rw-r--r--arch/powerpc/include/asm/epapr_hcalls.h12
-rw-r--r--arch/powerpc/include/asm/exception-64s.h5
-rw-r--r--arch/powerpc/include/asm/hugetlb.h6
-rw-r--r--arch/powerpc/include/asm/hw_irq.h1
-rw-r--r--arch/powerpc/include/asm/kprobes.h2
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h4
-rw-r--r--arch/powerpc/include/asm/mce.h4
-rw-r--r--arch/powerpc/include/asm/mmu_context.h50
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/opal-api.h3
-rw-r--r--arch/powerpc/include/asm/opal.h6
-rw-r--r--arch/powerpc/include/asm/paca.h13
-rw-r--r--arch/powerpc/include/asm/page_64.h6
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h1
-rw-r--r--arch/powerpc/include/asm/pgtable-be-types.h2
-rw-r--r--arch/powerpc/include/asm/pgtable-types.h4
-rw-r--r--arch/powerpc/include/asm/powernv.h4
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h27
-rw-r--r--arch/powerpc/include/asm/processor.h3
-rw-r--r--arch/powerpc/include/asm/string.h2
-rw-r--r--arch/powerpc/include/asm/switch_to.h5
-rw-r--r--arch/powerpc/include/asm/tlbflush.h2
-rw-r--r--arch/powerpc/include/asm/tm.h7
-rw-r--r--arch/powerpc/include/asm/topology.h8
-rw-r--r--arch/powerpc/include/asm/uaccess.h22
-rw-r--r--arch/powerpc/include/asm/vas.h21
-rw-r--r--arch/powerpc/include/uapi/asm/cputable.h1
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c6
-rw-r--r--arch/powerpc/kernel/cputable.c24
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c4
-rw-r--r--arch/powerpc/kernel/eeh.c46
-rw-r--r--arch/powerpc/kernel/eeh_driver.c2
-rw-r--r--arch/powerpc/kernel/eeh_pe.c8
-rw-r--r--arch/powerpc/kernel/entry_64.S4
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S49
-rw-r--r--arch/powerpc/kernel/fadump.c17
-rw-r--r--arch/powerpc/kernel/head_32.S2
-rw-r--r--arch/powerpc/kernel/head_64.S16
-rw-r--r--arch/powerpc/kernel/idle_book3s.S70
-rw-r--r--arch/powerpc/kernel/irq.c51
-rw-r--r--arch/powerpc/kernel/kprobes-ftrace.c34
-rw-r--r--arch/powerpc/kernel/kprobes.c92
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c4
-rw-r--r--arch/powerpc/kernel/mce.c147
-rw-r--r--arch/powerpc/kernel/mce_power.c115
-rw-r--r--arch/powerpc/kernel/module_64.c3
-rw-r--r--arch/powerpc/kernel/optprobes.c15
-rw-r--r--arch/powerpc/kernel/paca.c16
-rw-r--r--arch/powerpc/kernel/pci_64.c4
-rw-r--r--arch/powerpc/kernel/process.c225
-rw-r--r--arch/powerpc/kernel/prom.c37
-rw-r--r--arch/powerpc/kernel/setup-common.c7
-rw-r--r--arch/powerpc/kernel/setup.h6
-rw-r--r--arch/powerpc/kernel/setup_64.c19
-rw-r--r--arch/powerpc/kernel/signal.c2
-rw-r--r--arch/powerpc/kernel/signal_32.c6
-rw-r--r--arch/powerpc/kernel/signal_64.c7
-rw-r--r--arch/powerpc/kernel/sysfs.c11
-rw-r--r--arch/powerpc/kernel/tau_6xx.c3
-rw-r--r--arch/powerpc/kernel/tm.S59
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_mprofile.S4
-rw-r--r--arch/powerpc/kernel/traps.c256
-rw-r--r--arch/powerpc/kernel/watchdog.c29
-rw-r--r--arch/powerpc/kvm/book3s_hv.c20
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S8
-rw-r--r--arch/powerpc/kvm/powerpc.c3
-rw-r--r--arch/powerpc/lib/Makefile2
-rw-r--r--arch/powerpc/lib/pmem.c67
-rw-r--r--arch/powerpc/lib/sstep.c20
-rw-r--r--arch/powerpc/mm/Makefile6
-rw-r--r--arch/powerpc/mm/dump_hashpagetable.c2
-rw-r--r--arch/powerpc/mm/dump_linuxpagetables.c10
-rw-r--r--arch/powerpc/mm/hash_utils_64.c1
-rw-r--r--arch/powerpc/mm/hugetlbpage-radix.c20
-rw-r--r--arch/powerpc/mm/init_64.c21
-rw-r--r--arch/powerpc/mm/mmap.c49
-rw-r--r--arch/powerpc/mm/mmu_context.c9
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c33
-rw-r--r--arch/powerpc/mm/numa.c63
-rw-r--r--arch/powerpc/mm/pgtable-radix.c10
-rw-r--r--arch/powerpc/mm/pgtable_64.c2
-rw-r--r--arch/powerpc/mm/slb_low.S6
-rw-r--r--arch/powerpc/mm/slice.c62
-rw-r--r--arch/powerpc/mm/tlb-radix.c347
-rw-r--r--arch/powerpc/net/bpf_jit64.h7
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c16
-rw-r--r--arch/powerpc/oprofile/op_model_cell.c8
-rw-r--r--arch/powerpc/perf/hv-24x7.c2
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype19
-rw-r--r--arch/powerpc/platforms/powermac/low_i2c.c4
-rw-r--r--arch/powerpc/platforms/powernv/Makefile3
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c42
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c28
-rw-r--r--arch/powerpc/platforms/powernv/opal-async.c180
-rw-r--r--arch/powerpc/platforms/powernv/opal-hmi.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-irqchip.c8
-rw-r--r--arch/powerpc/platforms/powernv/opal-memory-errors.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor.c17
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S5
-rw-r--r--arch/powerpc/platforms/powernv/opal.c2
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c29
-rw-r--r--arch/powerpc/platforms/powernv/pci.h4
-rw-r--r--arch/powerpc/platforms/powernv/setup.c26
-rw-r--r--arch/powerpc/platforms/powernv/smp.c59
-rw-r--r--arch/powerpc/platforms/powernv/vas-debug.c209
-rw-r--r--arch/powerpc/platforms/powernv/vas-window.c242
-rw-r--r--arch/powerpc/platforms/powernv/vas.c31
-rw-r--r--arch/powerpc/platforms/powernv/vas.h93
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c2
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c19
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c8
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c2
-rw-r--r--arch/powerpc/platforms/pseries/vio.c2
-rw-r--r--arch/powerpc/sysdev/axonram.c2
-rw-r--r--arch/powerpc/sysdev/ipic.c4
-rw-r--r--arch/powerpc/xmon/xmon.c169
131 files changed, 3026 insertions, 927 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index cb782ac1c35d..c51e6ce42e7a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -139,9 +139,11 @@ config PPC
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
+ select ARCH_HAS_PMEM_API if PPC64
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
+ select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAS_ZONE_DEVICE if PPC_BOOK3S_64
select ARCH_HAVE_NMI_SAFE_CMPXCHG
@@ -335,7 +337,7 @@ config PPC_OF_PLATFORM_PCI
default n
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
- depends on PPC32 || PPC_STD_MMU_64
+ depends on PPC32 || PPC_BOOK3S_64
def_bool y
config ARCH_SUPPORTS_UPROBES
@@ -722,7 +724,7 @@ config PPC_16K_PAGES
config PPC_64K_PAGES
bool "64k page size"
- depends on !PPC_FSL_BOOK3E && (44x || PPC_STD_MMU_64 || PPC_BOOK3E_64)
+ depends on !PPC_FSL_BOOK3E && (44x || PPC_BOOK3S_64 || PPC_BOOK3E_64)
select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
config PPC_256K_PAGES
@@ -781,7 +783,7 @@ config FORCE_MAX_ZONEORDER
config PPC_SUBPAGE_PROT
bool "Support setting protections for 4k subpages"
- depends on PPC_STD_MMU_64 && PPC_64K_PAGES
+ depends on PPC_BOOK3S_64 && PPC_64K_PAGES
help
This option adds support for a system call to allow user programs
to set access permissions (read/write, readonly, or no access)
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index be1c8c5beb61..657c33cd4eee 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -370,4 +370,10 @@ config PPC_HTDUMP
def_bool y
depends on PPC_PTDUMP && PPC_BOOK3S
+config PPC_FAST_ENDIAN_SWITCH
+ bool "Deprecated fast endian-switch syscall"
+ depends on DEBUG_KERNEL && PPC_BOOK3S_64
+ help
+ If you're unsure what this is, say N.
+
endmenu
diff --git a/arch/powerpc/boot/dts/acadia.dts b/arch/powerpc/boot/dts/acadia.dts
index 57291f61ffe7..86266159521e 100644
--- a/arch/powerpc/boot/dts/acadia.dts
+++ b/arch/powerpc/boot/dts/acadia.dts
@@ -183,7 +183,7 @@
usb@ef603000 {
compatible = "ohci-be";
reg = <0xef603000 0x80>;
- interrupts-parent = <&UIC0>;
+ interrupt-parent = <&UIC0>;
interrupts = <0xd 0x4 0xe 0x4>;
};
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index caee834760d2..4891bbed6258 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -192,6 +192,7 @@ CONFIG_IPMI_DEVICE_INTERFACE=y
CONFIG_IPMI_POWERNV=y
CONFIG_RAW_DRIVER=y
CONFIG_MAX_RAW_DEVS=1024
+CONFIG_I2C_CHARDEV=y
CONFIG_DRM=y
CONFIG_DRM_AST=y
CONFIG_FIRMWARE_EDID=y
@@ -295,6 +296,7 @@ CONFIG_FUNCTION_GRAPH_TRACER=y
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_PPC_EMULATED_STATS=y
CONFIG_CODE_PATCHING_SELFTEST=y
CONFIG_FTR_FIXUP_SELFTEST=y
CONFIG_MSI_BITMAP_SELFTEST=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 3d935969e5a2..bde2cd1005a2 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -193,6 +193,7 @@ CONFIG_VIRTIO_CONSOLE=m
CONFIG_IBM_BSR=m
CONFIG_RAW_DRIVER=y
CONFIG_MAX_RAW_DEVS=1024
+CONFIG_I2C_CHARDEV=y
CONFIG_FB=y
CONFIG_FIRMWARE_EDID=y
CONFIG_FB_OF=y
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
new file mode 100644
index 000000000000..6bd5e7261335
--- /dev/null
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -0,0 +1,232 @@
+CONFIG_PPC64=y
+CONFIG_ALTIVEC=y
+CONFIG_VSX=y
+CONFIG_NR_CPUS=2048
+CONFIG_CPU_LITTLE_ENDIAN=y
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=20
+CONFIG_RELAY=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_GZIP is not set
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_JUMP_LABEL=y
+CONFIG_STRICT_KERNEL_RWX=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_SIG=y
+CONFIG_MODULE_SIG_FORCE=y
+CONFIG_MODULE_SIG_SHA512=y
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_PPC_PSERIES is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_IDLE=y
+CONFIG_HZ_100=y
+CONFIG_KEXEC=y
+CONFIG_IRQ_ALL_CPUS=y
+CONFIG_NUMA=y
+# CONFIG_COMPACTION is not set
+# CONFIG_MIGRATION is not set
+# CONFIG_BOUNCE is not set
+CONFIG_PPC_64K_PAGES=y
+CONFIG_SCHED_SMT=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=tty0 console=hvc0 powersave=off"
+# CONFIG_SECCOMP is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_NET_IPIP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+CONFIG_DNS_RESOLVER=y
+# CONFIG_WIRELESS is not set
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_MTD=m
+CONFIG_MTD_POWERNV_FLASH=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_VIRTIO_BLK=m
+CONFIG_BLK_DEV_NVME=m
+CONFIG_EEPROM_AT24=y
+# CONFIG_CXL is not set
+CONFIG_BLK_DEV_SD=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=m
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_CXGB3_ISCSI=m
+CONFIG_SCSI_CXGB4_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_BE2ISCSI=m
+CONFIG_SCSI_AACRAID=m
+CONFIG_MEGARAID_NEWGEN=y
+CONFIG_MEGARAID_MM=m
+CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_SAS=m
+CONFIG_SCSI_MPT2SAS=m
+CONFIG_SCSI_IPR=m
+# CONFIG_SCSI_IPR_TRACE is not set
+# CONFIG_SCSI_IPR_DUMP is not set
+CONFIG_SCSI_QLA_FC=m
+CONFIG_SCSI_QLA_ISCSI=m
+CONFIG_SCSI_LPFC=m
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+# CONFIG_ATA_SFF is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID456=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_ACENIC=m
+CONFIG_ACENIC_OMIT_TIGON_I=y
+CONFIG_TIGON3=y
+CONFIG_BNX2X=m
+CONFIG_CHELSIO_T1=y
+CONFIG_BE2NET=m
+CONFIG_S2IO=m
+CONFIG_E100=m
+CONFIG_E1000=m
+CONFIG_E1000E=m
+CONFIG_IXGB=m
+CONFIG_IXGBE=m
+CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
+CONFIG_MYRI10GE=m
+CONFIG_QLGE=m
+CONFIG_NETXEN_NIC=m
+CONFIG_SFC=m
+# CONFIG_USB_NET_DRIVERS is not set
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_MISC=y
+# CONFIG_SERIO_SERPORT is not set
+# CONFIG_DEVMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_IPMI_HANDLER=y
+CONFIG_IPMI_DEVICE_INTERFACE=y
+CONFIG_IPMI_POWERNV=y
+CONFIG_HW_RANDOM=y
+CONFIG_TCG_TIS_I2C_NUVOTON=y
+# CONFIG_I2C_COMPAT is not set
+CONFIG_I2C_CHARDEV=y
+# CONFIG_I2C_HELPER_AUTO is not set
+CONFIG_DRM=y
+CONFIG_DRM_RADEON=y
+CONFIG_DRM_AST=m
+CONFIG_FIRMWARE_EDID=y
+CONFIG_FB_MODE_HELPERS=y
+CONFIG_FB_OF=y
+CONFIG_FB_MATROX=y
+CONFIG_FB_MATROX_MILLENIUM=y
+CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_FB_MATROX_G=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_GENERIC is not set
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_USB_HIDDEV=y
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_XHCI_HCD=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_HCD_PPC_OF is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_GENERIC=m
+CONFIG_VIRT_DRIVERS=y
+CONFIG_VIRTIO_PCI=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT4_FS=m
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_ISO9660_FS=m
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_CRC16=y
+CONFIG_CRC_ITU_T=y
+CONFIG_LIBCRC32C=y
+CONFIG_PRINTK_TIME=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_WQ_WATCHDOG=y
+CONFIG_SCHEDSTATS=y
+# CONFIG_FTRACE is not set
+CONFIG_XMON=y
+CONFIG_XMON_DEFAULT=y
+CONFIG_SECURITY=y
+CONFIG_IMA=y
+CONFIG_EVM=y
+# CONFIG_CRYPTO_ECHAINIV is not set
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_CMAC=y
+CONFIG_CRYPTO_MD4=y
+CONFIG_CRYPTO_ARC4=y
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 508275bb05d5..e91e115a816f 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -606,7 +606,7 @@ extern void slb_set_size(u16 size);
/* 4 bits per slice and we have one slice per 1TB */
#define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41)
-#define TASK_SLICE_ARRAY_SZ(x) ((x)->context.addr_limit >> 41)
+#define TASK_SLICE_ARRAY_SZ(x) ((x)->context.slb_addr_limit >> 41)
#ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 37fdede5a24c..c9448e19847a 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -93,7 +93,7 @@ typedef struct {
#ifdef CONFIG_PPC_MM_SLICES
u64 low_slices_psize; /* SLB page size encodings */
unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
- unsigned long addr_limit;
+ unsigned long slb_addr_limit;
#else
u16 sllp; /* SLB page size encoding */
#endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
index 42178897a050..849ecaae9e79 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -66,6 +66,28 @@ static inline void hash__flush_tlb_mm(struct mm_struct *mm)
{
}
+static inline void hash__local_flush_all_mm(struct mm_struct *mm)
+{
+ /*
+ * There's no Page Walk Cache for hash, so what is needed is
+ * the same as flush_tlb_mm(), which doesn't really make sense
+ * with hash. So the only thing we could do is flush the
+ * entire LPID! Punt for now, as it's not being used.
+ */
+ WARN_ON_ONCE(1);
+}
+
+static inline void hash__flush_all_mm(struct mm_struct *mm)
+{
+ /*
+ * There's no Page Walk Cache for hash, so what is needed is
+ * the same as flush_tlb_mm(), which doesn't really make sense
+ * with hash. So the only thing we could do is flush the
+ * entire LPID! Punt for now, as it's not being used.
+ */
+ WARN_ON_ONCE(1);
+}
+
static inline void hash__local_flush_tlb_page(struct vm_area_struct *vma,
unsigned long vmaddr)
{
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index c2115dfcef0c..6a9e68003387 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -22,17 +22,20 @@ extern void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long sta
extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end);
extern void radix__local_flush_tlb_mm(struct mm_struct *mm);
+extern void radix__local_flush_all_mm(struct mm_struct *mm);
extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
extern void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
int psize);
extern void radix__tlb_flush(struct mmu_gather *tlb);
#ifdef CONFIG_SMP
extern void radix__flush_tlb_mm(struct mm_struct *mm);
+extern void radix__flush_all_mm(struct mm_struct *mm);
extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
int psize);
#else
#define radix__flush_tlb_mm(mm) radix__local_flush_tlb_mm(mm)
+#define radix__flush_all_mm(mm) radix__local_flush_all_mm(mm)
#define radix__flush_tlb_page(vma,addr) radix__local_flush_tlb_page(vma,addr)
#define radix__flush_tlb_page_psize(mm,addr,p) radix__local_flush_tlb_page_psize(mm,addr,p)
#endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index fcffddbb3102..58b576f654b3 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -58,6 +58,13 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma,
return hash__local_flush_tlb_page(vma, vmaddr);
}
+static inline void local_flush_all_mm(struct mm_struct *mm)
+{
+ if (radix_enabled())
+ return radix__local_flush_all_mm(mm);
+ return hash__local_flush_all_mm(mm);
+}
+
static inline void tlb_flush(struct mmu_gather *tlb)
{
if (radix_enabled())
@@ -80,9 +87,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
return radix__flush_tlb_page(vma, vmaddr);
return hash__flush_tlb_page(vma, vmaddr);
}
+
+static inline void flush_all_mm(struct mm_struct *mm)
+{
+ if (radix_enabled())
+ return radix__flush_all_mm(mm);
+ return hash__flush_all_mm(mm);
+}
#else
#define flush_tlb_mm(mm) local_flush_tlb_mm(mm)
#define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr)
+#define flush_all_mm(mm) local_flush_all_mm(mm)
#endif /* CONFIG_SMP */
/*
* flush the page walk cache for the address
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 53b31c2bcdf4..0546663a98db 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -207,7 +207,7 @@ enum {
#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0004000000000000)
#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0008000000000000)
#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0010000000000000)
-#define CPU_FTR_ICSWX LONG_ASM_CONST(0x0020000000000000)
+/* Free LONG_ASM_CONST(0x0020000000000000) */
#define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0040000000000000)
#define CPU_FTR_TM LONG_ASM_CONST(0x0080000000000000)
#define CPU_FTR_CFAR LONG_ASM_CONST(0x0100000000000000)
@@ -216,6 +216,7 @@ enum {
#define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000)
#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000)
#define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000)
+#define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x8000000000000000)
#ifndef __ASSEMBLY__
@@ -452,7 +453,7 @@ enum {
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
- CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | \
+ CPU_FTR_CFAR | CPU_FTR_HVMODE | \
CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX)
#define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
@@ -461,7 +462,7 @@ enum {
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
CPU_FTR_DSCR | CPU_FTR_SAO | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
- CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
+ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP)
#define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
@@ -478,6 +479,8 @@ enum {
CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300)
#define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \
(~CPU_FTR_SAO))
+#define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
+#define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
@@ -496,7 +499,8 @@ enum {
(CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \
- CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1)
+ CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9 | \
+ CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1)
#endif
#else
enum {
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 9847ae3a12d1..5161c37dd039 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -93,7 +93,7 @@ struct eeh_pe {
struct pci_bus *bus; /* Top PCI bus for bus PE */
int check_count; /* Times of ignored error */
int freeze_count; /* Times of froze up */
- struct timeval tstamp; /* Time on first-time freeze */
+ time64_t tstamp; /* Time on first-time freeze */
int false_positives; /* Times of reported #ff's */
atomic_t pass_dev_cnt; /* Count of passed through devs */
struct eeh_pe *parent; /* Parent PE */
@@ -200,7 +200,6 @@ enum {
struct eeh_ops {
char *name;
int (*init)(void);
- int (*post_init)(void);
void* (*probe)(struct pci_dn *pdn, void *data);
int (*set_option)(struct eeh_pe *pe, int option);
int (*get_pe_addr)(struct eeh_pe *pe);
@@ -275,7 +274,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
struct eeh_dev *eeh_dev_init(struct pci_dn *pdn);
void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
-int eeh_init(void);
+void eeh_probe_devices(void);
int __init eeh_ops_register(struct eeh_ops *ops);
int __exit eeh_ops_unregister(const char *name);
int eeh_check_failure(const volatile void __iomem *token);
@@ -321,10 +320,7 @@ static inline bool eeh_enabled(void)
return false;
}
-static inline int eeh_init(void)
-{
- return 0;
-}
+static inline void eeh_probe_devices(void) { }
static inline void *eeh_dev_init(struct pci_dn *pdn, void *data)
{
diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h
index f00e10e2a335..651e1354498e 100644
--- a/arch/powerpc/include/asm/emulated_ops.h
+++ b/arch/powerpc/include/asm/emulated_ops.h
@@ -55,6 +55,10 @@ extern struct ppc_emulated {
struct ppc_emulated_entry mfdscr;
struct ppc_emulated_entry mtdscr;
struct ppc_emulated_entry lq_stq;
+ struct ppc_emulated_entry lxvw4x;
+ struct ppc_emulated_entry lxvh8x;
+ struct ppc_emulated_entry lxvd2x;
+ struct ppc_emulated_entry lxvb16x;
#endif
} ppc_emulated;
diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h
index 334459ad145b..90863245df53 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -508,7 +508,7 @@ static unsigned long epapr_hypercall(unsigned long *in,
static inline long epapr_hypercall0_1(unsigned int nr, unsigned long *r2)
{
- unsigned long in[8];
+ unsigned long in[8] = {0};
unsigned long out[8];
unsigned long r;
@@ -520,7 +520,7 @@ static inline long epapr_hypercall0_1(unsigned int nr, unsigned long *r2)
static inline long epapr_hypercall0(unsigned int nr)
{
- unsigned long in[8];
+ unsigned long in[8] = {0};
unsigned long out[8];
return epapr_hypercall(in, out, nr);
@@ -528,7 +528,7 @@ static inline long epapr_hypercall0(unsigned int nr)
static inline long epapr_hypercall1(unsigned int nr, unsigned long p1)
{
- unsigned long in[8];
+ unsigned long in[8] = {0};
unsigned long out[8];
in[0] = p1;
@@ -538,7 +538,7 @@ static inline long epapr_hypercall1(unsigned int nr, unsigned long p1)
static inline long epapr_hypercall2(unsigned int nr, unsigned long p1,
unsigned long p2)
{
- unsigned long in[8];
+ unsigned long in[8] = {0};
unsigned long out[8];
in[0] = p1;
@@ -549,7 +549,7 @@ static inline long epapr_hypercall2(unsigned int nr, unsigned long p1,
static inline long epapr_hypercall3(unsigned int nr, unsigned long p1,
unsigned long p2, unsigned long p3)
{
- unsigned long in[8];
+ unsigned long in[8] = {0};
unsigned long out[8];
in[0] = p1;
@@ -562,7 +562,7 @@ static inline long epapr_hypercall4(unsigned int nr, unsigned long p1,
unsigned long p2, unsigned long p3,
unsigned long p4)
{
- unsigned long in[8];
+ unsigned long in[8] = {0};
unsigned long out[8];
in[0] = p1;
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 9a318973af05..b27205297e1d 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -55,6 +55,11 @@
#endif
/*
+ * maximum recursive depth of MCE exceptions
+ */
+#define MAX_MCE_DEPTH 4
+
+/*
* EX_LR is only used in EXSLB and where it does not overlap with EX_DAR
* EX_CCR similarly with DSISR, but being 4 byte registers there is a hole
* in the save area so it's not necessary to overlap them. Could be used
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 93f98239159f..14c9d44f355b 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -41,12 +41,6 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma,
return radix__flush_hugetlb_page(vma, vmaddr);
}
-static inline void __local_flush_hugetlb_page(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
- if (radix_enabled())
- return radix__local_flush_hugetlb_page(vma, vmaddr);
-}
#else
static inline pte_t *hugepd_page(hugepd_t hpd)
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index abd04c36c251..3818fa0164f0 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -32,6 +32,7 @@
#ifndef __ASSEMBLY__
+extern void replay_system_reset(void);
extern void __replay_interrupt(unsigned int vector);
extern void timer_interrupt(struct pt_regs *);
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index 8814a7249ceb..9f3be5c8a4a3 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -103,8 +103,8 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
extern int kprobe_handler(struct pt_regs *regs);
extern int kprobe_post_handler(struct pt_regs *regs);
-extern int is_current_kprobe_addr(unsigned long addr);
#ifdef CONFIG_KPROBES_ON_FTRACE
+extern int __is_active_jprobe(unsigned long addr);
extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb);
#else
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 83596f32f50b..7cea76f11c26 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -104,10 +104,6 @@ struct kvmppc_host_state {
u8 napping;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- /*
- * hwthread_req/hwthread_state pair is used to pull sibling threads
- * out of guest on pre-ISAv3.0B CPUs where threads share MMU.
- */
u8 hwthread_req;
u8 hwthread_state;
u8 host_ipi;
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 190d69a7f701..3a1226e9b465 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -204,12 +204,10 @@ struct mce_error_info {
extern void save_mce_event(struct pt_regs *regs, long handled,
struct mce_error_info *mce_err, uint64_t nip,
- uint64_t addr);
+ uint64_t addr, uint64_t phys_addr);
extern int get_mce_event(struct machine_check_event *mce, bool release);
extern void release_mce_event(void);
extern void machine_check_queue_event(void);
extern void machine_check_print_event_info(struct machine_check_event *evt,
bool user_mode);
-extern uint64_t get_mce_fault_addr(struct machine_check_event *evt);
-
#endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 492d8140a395..6177d43f0ce8 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -78,6 +78,52 @@ extern void switch_cop(struct mm_struct *next);
extern int use_cop(unsigned long acop, struct mm_struct *mm);
extern void drop_cop(unsigned long acop, struct mm_struct *mm);
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline void inc_mm_active_cpus(struct mm_struct *mm)
+{
+ atomic_inc(&mm->context.active_cpus);
+}
+
+static inline void dec_mm_active_cpus(struct mm_struct *mm)
+{
+ atomic_dec(&mm->context.active_cpus);
+}
+
+static inline void mm_context_add_copro(struct mm_struct *mm)
+{
+ /*
+ * On hash, should only be called once over the lifetime of
+ * the context, as we can't decrement the active cpus count
+ * and flush properly for the time being.
+ */
+ inc_mm_active_cpus(mm);
+}
+
+static inline void mm_context_remove_copro(struct mm_struct *mm)
+{
+ /*
+ * Need to broadcast a global flush of the full mm before
+ * decrementing active_cpus count, as the next TLBI may be
+ * local and the nMMU and/or PSL need to be cleaned up.
+ * Should be rare enough so that it's acceptable.
+ *
+ * Skip on hash, as we don't know how to do the proper flush
+ * for the time being. Invalidations will remain global if
+ * used on hash.
+ */
+ if (radix_enabled()) {
+ flush_all_mm(mm);
+ dec_mm_active_cpus(mm);
+ }
+}
+#else
+static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
+static inline void dec_mm_active_cpus(struct mm_struct *mm) { }
+static inline void mm_context_add_copro(struct mm_struct *mm) { }
+static inline void mm_context_remove_copro(struct mm_struct *mm) { }
+#endif
+
+
extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk);
@@ -119,9 +165,13 @@ static inline void arch_dup_mmap(struct mm_struct *oldmm,
{
}
+#ifndef CONFIG_PPC_BOOK3S_64
static inline void arch_exit_mmap(struct mm_struct *mm)
{
}
+#else
+extern void arch_exit_mmap(struct mm_struct *mm);
+#endif
static inline void arch_unmap(struct mm_struct *mm,
struct vm_area_struct *vma,
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 265bbd7cba73..abddf5830ad5 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -204,7 +204,7 @@ static inline unsigned long pte_update(struct mm_struct *mm,
if (!huge)
assert_pte_locked(mm, addr);
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
if (old & _PAGE_HASHPTE)
hpte_need_flush(mm, addr, ptep, old, huge);
#endif
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 450a60b81d2a..233c7504b1f2 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -188,6 +188,7 @@
#define OPAL_XIVE_DUMP 142
#define OPAL_XIVE_RESERVED3 143
#define OPAL_XIVE_RESERVED4 144
+#define OPAL_SIGNAL_SYSTEM_RESET 145
#define OPAL_NPU_INIT_CONTEXT 146
#define OPAL_NPU_DESTROY_CONTEXT 147
#define OPAL_NPU_MAP_LPAR 148
@@ -895,6 +896,8 @@ enum {
*/
OPAL_REINIT_CPUS_MMU_HASH = (1 << 2),
OPAL_REINIT_CPUS_MMU_RADIX = (1 << 3),
+
+ OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED = (1 << 4),
};
typedef struct oppanel_line {
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 726c23304a57..0c545f7fc77b 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -281,6 +281,8 @@ int opal_get_power_shift_ratio(u32 handle, int token, u32 *psr);
int opal_set_power_shift_ratio(u32 handle, int token, u32 psr);
int opal_sensor_group_clear(u32 group_hndl, int token);
+s64 opal_signal_system_reset(s32 cpu);
+
/* Internal functions */
extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
int depth, void *data);
@@ -304,11 +306,11 @@ extern void opal_notifier_enable(void);
extern void opal_notifier_disable(void);
extern void opal_notifier_update_evt(uint64_t evt_mask, uint64_t evt_val);
-extern int __opal_async_get_token(void);
extern int opal_async_get_token_interruptible(void);
-extern int __opal_async_release_token(int token);
extern int opal_async_release_token(int token);
extern int opal_async_wait_response(uint64_t token, struct opal_msg *msg);
+extern int opal_async_wait_response_interruptible(uint64_t token,
+ struct opal_msg *msg);
extern int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data);
struct rtc_time;
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 04b60af027ae..3892db93b837 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -91,14 +91,14 @@ struct paca_struct {
u8 cpu_start; /* At startup, processor spins until */
/* this becomes non-zero. */
u8 kexec_state; /* set when kexec down has irqs off */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
struct slb_shadow *slb_shadow_ptr;
struct dtl_entry *dispatch_log;
struct dtl_entry *dispatch_log_end;
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif
u64 dscr_default; /* per-CPU default DSCR */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
/*
* Now, starting in cacheline 2, the exception save areas
*/
@@ -110,7 +110,7 @@ struct paca_struct {
u16 vmalloc_sllp;
u16 slb_cache_ptr;
u32 slb_cache[SLB_CACHE_ENTRIES];
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC_BOOK3E
u64 exgen[8] __aligned(0x40);
@@ -143,7 +143,7 @@ struct paca_struct {
#ifdef CONFIG_PPC_MM_SLICES
u64 mm_ctx_low_slices_psize;
unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
- unsigned long addr_limit;
+ unsigned long mm_ctx_slb_addr_limit;
#else
u16 mm_ctx_user_psize;
u16 mm_ctx_sllp;
@@ -192,7 +192,7 @@ struct paca_struct {
struct stop_sprs stop_sprs;
#endif
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
/* Non-maskable exceptions that are not performance critical */
u64 exnmi[EX_SIZE]; /* used for system reset (nmi) */
u64 exmc[EX_SIZE]; /* used for machine checks */
@@ -210,6 +210,7 @@ struct paca_struct {
*/
u16 in_mce;
u8 hmi_event_available; /* HMI event is available */
+ u8 hmi_p9_special_emu; /* HMI P9 special emulation */
#endif
/* Stuff for accurate time accounting */
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index c4d9654bd637..56234c6fcd61 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -117,21 +117,21 @@ extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
#endif /* __ASSEMBLY__ */
#else
#define slice_init()
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
#define get_slice_psize(mm, addr) ((mm)->context.user_psize)
#define slice_set_user_psize(mm, psize) \
do { \
(mm)->context.user_psize = (psize); \
(mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
} while (0)
-#else /* CONFIG_PPC_STD_MMU_64 */
+#else /* !CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC_64K_PAGES
#define get_slice_psize(mm, addr) MMU_PAGE_64K
#else /* CONFIG_PPC_64K_PAGES */
#define get_slice_psize(mm, addr) MMU_PAGE_4K
#endif /* !CONFIG_PPC_64K_PAGES */
#define slice_set_user_psize(mm, psize) do { BUG(); } while(0)
-#endif /* !CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
#define slice_set_range_psize(mm, start, len, psize) \
slice_set_user_psize((mm), (psize))
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 0b8aa1fe2d5f..62ed83db04ae 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -218,6 +218,7 @@ struct pci_dn {
#endif
struct list_head child_list;
struct list_head list;
+ struct resource holes[PCI_SRIOV_NUM_BARS];
};
/* Get the pointer to a device_node's pci_dn */
diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h
index beb6e3e79788..a89c67b62680 100644
--- a/arch/powerpc/include/asm/pgtable-be-types.h
+++ b/arch/powerpc/include/asm/pgtable-be-types.h
@@ -77,7 +77,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
* With hash config 64k pages additionally define a bigger "real PTE" type that
* gathers the "second half" part of the PTE for pseudo 64k pages
*/
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_BOOK3S_64)
typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
#else
typedef struct { pte_t pte; } real_pte_t;
diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h
index cfe89a6fc308..eccb30b38b47 100644
--- a/arch/powerpc/include/asm/pgtable-types.h
+++ b/arch/powerpc/include/asm/pgtable-types.h
@@ -50,13 +50,13 @@ typedef struct { unsigned long pgprot; } pgprot_t;
* With hash config 64k pages additionally define a bigger "real PTE" type that
* gathers the "second half" part of the PTE for pseudo 64k pages
*/
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_BOOK3S_64)
typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
#else
typedef struct { pte_t pte; } real_pte_t;
#endif
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
#include <asm/cmpxchg.h>
static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new)
diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h
index f62797702300..dc5f6a5d4575 100644
--- a/arch/powerpc/include/asm/powernv.h
+++ b/arch/powerpc/include/asm/powernv.h
@@ -22,6 +22,8 @@ extern void pnv_npu2_destroy_context(struct npu_context *context,
extern int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea,
unsigned long *flags, unsigned long *status,
int count);
+
+void pnv_tm_init(void);
#else
static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { }
static inline struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
@@ -36,6 +38,8 @@ static inline int pnv_npu2_handle_fault(struct npu_context *context,
unsigned long *status, int count) {
return -ENODEV;
}
+
+static inline void pnv_tm_init(void) { }
#endif
#endif /* _ASM_POWERNV_H */
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 36f3e41c9fbe..ae94b3626b6c 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -774,9 +774,13 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
#ifdef CONFIG_PPC_BOOK3E
#define FIXUP_ENDIAN
#else
+/*
+ * This version may be used in in HV or non-HV context.
+ * MSR[EE] must be disabled.
+ */
#define FIXUP_ENDIAN \
tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \
- b $+44; /* Skip trampoline if endian is good */ \
+ b 191f; /* Skip trampoline if endian is good */ \
.long 0xa600607d; /* mfmsr r11 */ \
.long 0x01006b69; /* xori r11,r11,1 */ \
.long 0x00004039; /* li r10,0 */ \
@@ -786,7 +790,26 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
.long 0x14004a39; /* addi r10,r10,20 */ \
.long 0xa6035a7d; /* mtsrr0 r10 */ \
.long 0xa6037b7d; /* mtsrr1 r11 */ \
- .long 0x2400004c /* rfid */
+ .long 0x2400004c; /* rfid */ \
+191:
+
+/*
+ * This version that may only be used with MSR[HV]=1
+ * - Does not clear MSR[RI], so more robust.
+ * - Slightly smaller and faster.
+ */
+#define FIXUP_ENDIAN_HV \
+ tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \
+ b 191f; /* Skip trampoline if endian is good */ \
+ .long 0xa600607d; /* mfmsr r11 */ \
+ .long 0x01006b69; /* xori r11,r11,1 */ \
+ .long 0x05009f42; /* bcl 20,31,$+4 */ \
+ .long 0xa602487d; /* mflr r10 */ \
+ .long 0x14004a39; /* addi r10,r10,20 */ \
+ .long 0xa64b5a7d; /* mthsrr0 r10 */ \
+ .long 0xa64b7b7d; /* mthsrr1 r11 */ \
+ .long 0x2402004c; /* hrfid */ \
+191:
#endif /* !CONFIG_PPC_BOOK3E */
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index fab7ff877304..bdab3b74eb98 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -329,6 +329,7 @@ struct thread_struct {
*/
int dscr_inherit;
unsigned long ppr; /* used to save/restore SMT priority */
+ unsigned long tidr;
#endif
#ifdef CONFIG_PPC_BOOK3S_64
unsigned long tar;
@@ -340,7 +341,9 @@ struct thread_struct {
unsigned long sier;
unsigned long mmcr2;
unsigned mmcr0;
+
unsigned used_ebb;
+ unsigned int used_vas;
#endif
};
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index d98ac188cedb..9b8cedf618f4 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -12,6 +12,7 @@
#define __HAVE_ARCH_MEMCMP
#define __HAVE_ARCH_MEMCHR
#define __HAVE_ARCH_MEMSET16
+#define __HAVE_ARCH_MEMCPY_FLUSHCACHE
extern char * strcpy(char *,const char *);
extern char * strncpy(char *,const char *, __kernel_size_t);
@@ -24,6 +25,7 @@ extern void * memcpy(void *,const void *,__kernel_size_t);
extern void * memmove(void *,const void *,__kernel_size_t);
extern int memcmp(const void *,const void *,__kernel_size_t);
extern void * memchr(const void *,int,__kernel_size_t);
+extern void * memcpy_flushcache(void *,const void *,__kernel_size_t);
#ifdef CONFIG_PPC64
#define __HAVE_ARCH_MEMSET32
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index bf820f53e27e..c3ca42cdc9f5 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -92,4 +92,9 @@ static inline void clear_task_ebb(struct task_struct *t)
#endif
}
+extern int set_thread_uses_vas(void);
+
+extern int set_thread_tidr(struct task_struct *t);
+extern void clear_thread_tidr(struct task_struct *t);
+
#endif /* _ASM_POWERPC_SWITCH_TO_H */
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
index 13dbcd41885e..7d5a157c7832 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -77,7 +77,7 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
flush_tlb_mm(mm);
}
-#elif defined(CONFIG_PPC_STD_MMU_64)
+#elif defined(CONFIG_PPC_BOOK3S_64)
#include <asm/book3s/64/tlbflush.h>
#else
#error Unsupported MMU type
diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h
index a8bc72a7f4be..b1658c97047c 100644
--- a/arch/powerpc/include/asm/tm.h
+++ b/arch/powerpc/include/asm/tm.h
@@ -12,12 +12,13 @@
extern void tm_enable(void);
extern void tm_reclaim(struct thread_struct *thread,
- unsigned long orig_msr, uint8_t cause);
+ uint8_t cause);
extern void tm_reclaim_current(uint8_t cause);
-extern void tm_recheckpoint(struct thread_struct *thread,
- unsigned long orig_msr);
+extern void tm_recheckpoint(struct thread_struct *thread);
extern void tm_abort(uint8_t cause);
extern void tm_save_sprs(struct thread_struct *thread);
extern void tm_restore_sprs(struct thread_struct *thread);
+extern bool tm_suspend_disabled;
+
#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 023ff9f17501..88187c285c70 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -97,6 +97,14 @@ static inline int prrn_is_enabled(void)
}
#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES)
+#if defined(CONFIG_PPC_SPLPAR)
+extern int timed_topology_update(int nsecs);
+#else
+#define timed_topology_update(nsecs)
+#endif /* CONFIG_PPC_SPLPAR */
+#endif /* CONFIG_HOTPLUG_CPU || CONFIG_NEED_MULTIPLE_NODES */
+
#include <asm-generic/topology.h>
#ifdef CONFIG_SMP
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 11f4bd07cce0..51bfeb8777f0 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -174,6 +174,23 @@ do { \
extern long __get_user_bad(void);
+/*
+ * This does an atomic 128 byte aligned load from userspace.
+ * Upto caller to do enable_kernel_vmx() before calling!
+ */
+#define __get_user_atomic_128_aligned(kaddr, uaddr, err) \
+ __asm__ __volatile__( \
+ "1: lvx 0,0,%1 # get user\n" \
+ " stvx 0,0,%2 # put kernel\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: li %0,%3\n" \
+ " b 2b\n" \
+ ".previous\n" \
+ EX_TABLE(1b, 3b) \
+ : "=r" (err) \
+ : "b" (uaddr), "b" (kaddr), "i" (-EFAULT), "0" (err))
+
#define __get_user_asm(x, addr, err, op) \
__asm__ __volatile__( \
"1: "op" %1,0(%2) # get_user\n" \
@@ -340,4 +357,9 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size)
extern long strncpy_from_user(char *dst, const char __user *src, long count);
extern __must_check long strnlen_user(const char __user *str, long n);
+extern long __copy_from_user_flushcache(void *dst, const void __user *src,
+ unsigned size);
+extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
+ size_t len);
+
#endif /* _ARCH_POWERPC_UACCESS_H */
diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
index fd5963acd658..771456227496 100644
--- a/arch/powerpc/include/asm/vas.h
+++ b/arch/powerpc/include/asm/vas.h
@@ -10,6 +10,8 @@
#ifndef _ASM_POWERPC_VAS_H
#define _ASM_POWERPC_VAS_H
+struct vas_window;
+
/*
* Min and max FIFO sizes are based on Version 1.05 Section 3.1.4.25
* (Local FIFO Size Register) of the VAS workbook.
@@ -104,6 +106,15 @@ struct vas_tx_win_attr {
};
/*
+ * Helper to map a chip id to VAS id.
+ * For POWER9, this is a 1:1 mapping. In the future this maybe a 1:N
+ * mapping in which case, we will need to update this helper.
+ *
+ * Return the VAS id or -1 if no matching vasid is found.
+ */
+int chip_to_vas_id(int chipid);
+
+/*
* Helper to initialize receive window attributes to defaults for an
* NX window.
*/
@@ -156,4 +167,14 @@ int vas_copy_crb(void *crb, int offset);
*/
int vas_paste_crb(struct vas_window *win, int offset, bool re);
+/*
+ * Return a system-wide unique id for the VAS window @win.
+ */
+extern u32 vas_win_id(struct vas_window *win);
+
+/*
+ * Return the power bus paste address associated with @win so the caller
+ * can map that address into their address space.
+ */
+extern u64 vas_win_paste_addr(struct vas_window *win);
#endif /* __ASM_POWERPC_VAS_H */
diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
index 50bcb4295de4..540592034740 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h
@@ -49,6 +49,7 @@
#define PPC_FEATURE2_HAS_IEEE128 0x00400000 /* VSX IEEE Binary Float 128-bit */
#define PPC_FEATURE2_DARN 0x00200000 /* darn random number insn */
#define PPC_FEATURE2_SCV 0x00100000 /* scv syscall */
+#define PPC_FEATURE2_HTM_NO_SUSPEND 0x00080000 /* TM w/out suspended state */
/*
* IMPORTANT!
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 6c6cce937dd8..1b6bc7fba996 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -129,7 +129,7 @@ obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o
obj-$(CONFIG_PPC64) += $(obj64-y)
obj-$(CONFIG_PPC32) += $(obj32-y)
-ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE),)
+ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE)(CONFIG_PPC_BOOK3S),)
obj-y += ppc_save_regs.o
endif
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8cfb20e38cfe..9aace433491a 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -185,7 +185,7 @@ int main(void)
#ifdef CONFIG_PPC_MM_SLICES
OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize);
OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize);
- DEFINE(PACA_ADDR_LIMIT, offsetof(struct paca_struct, addr_limit));
+ OFFSET(PACA_SLB_ADDR_LIMIT, paca_struct, mm_ctx_slb_addr_limit);
DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
#endif /* CONFIG_PPC_MM_SLICES */
#endif
@@ -208,7 +208,7 @@ int main(void)
OFFSET(TCD_ESEL_FIRST, tlb_core_data, esel_first);
#endif /* CONFIG_PPC_BOOK3E */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
OFFSET(PACASLBCACHE, paca_struct, slb_cache);
OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr);
OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp);
@@ -230,7 +230,7 @@ int main(void)
OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx);
OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count);
OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx);
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
OFFSET(PACAEMERGSP, paca_struct, emergency_sp);
#ifdef CONFIG_PPC_BOOK3S_64
OFFSET(PACAMCEMERGSP, paca_struct, mc_emergency_sp);
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 760872916013..1350f49d81a8 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -547,11 +547,31 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
- { /* Power9 */
+ { /* Power9 DD2.0 */
+ .pvr_mask = 0xffffefff,
+ .pvr_value = 0x004e0200,
+ .cpu_name = "POWER9 (raw)",
+ .cpu_features = CPU_FTRS_POWER9_DD2_0,
+ .cpu_user_features = COMMON_USER_POWER9,
+ .cpu_user_features2 = COMMON_USER2_POWER9,
+ .mmu_features = MMU_FTRS_POWER9,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power9",
+ .oprofile_type = PPC_OPROFILE_INVALID,
+ .cpu_setup = __setup_cpu_power9,
+ .cpu_restore = __restore_cpu_power9,
+ .flush_tlb = __flush_tlb_power9,
+ .machine_check_early = __machine_check_early_realmode_p9,
+ .platform = "power9",
+ },
+ { /* Power9 DD 2.1 or later (see DD2.0 above) */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004e0000,
.cpu_name = "POWER9 (raw)",
- .cpu_features = CPU_FTRS_POWER9,
+ .cpu_features = CPU_FTRS_POWER9_DD2_1,
.cpu_user_features = COMMON_USER_POWER9,
.cpu_user_features2 = COMMON_USER2_POWER9,
.mmu_features = MMU_FTRS_POWER9,
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 7275fed271af..602e0fde19b4 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -634,7 +634,7 @@ static struct dt_cpu_feature_match __initdata
{"no-execute", feat_enable, 0},
{"strong-access-ordering", feat_enable, CPU_FTR_SAO},
{"cache-inhibited-large-page", feat_enable_large_ci, 0},
- {"coprocessor-icswx", feat_enable, CPU_FTR_ICSWX},
+ {"coprocessor-icswx", feat_enable, 0},
{"hypervisor-virtualization-interrupt", feat_enable_hvi, 0},
{"program-priority-register", feat_enable, CPU_FTR_HAS_PPR},
{"wait", feat_enable, 0},
@@ -735,6 +735,8 @@ static __init void cpufeatures_cpu_quirks(void)
*/
if ((version & 0xffffff00) == 0x004e0100)
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1;
+ else if ((version & 0xffffefff) == 0x004e0200)
+ cur_cpu_spec->cpu_features &= ~CPU_FTR_POWER9_DD2_1;
}
static void __init cpufeatures_setup_finished(void)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 116000b45531..cbca0a667682 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -972,6 +972,18 @@ static struct notifier_block eeh_reboot_nb = {
.notifier_call = eeh_reboot_notifier,
};
+void eeh_probe_devices(void)
+{
+ struct pci_controller *hose, *tmp;
+ struct pci_dn *pdn;
+
+ /* Enable EEH for all adapters */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ pdn = hose->pci_data;
+ traverse_pci_dn(pdn, eeh_ops->probe, NULL);
+ }
+}
+
/**
* eeh_init - EEH initialization
*
@@ -987,22 +999,11 @@ static struct notifier_block eeh_reboot_nb = {
* Even if force-off is set, the EEH hardware is still enabled, so that
* newer systems can boot.
*/
-int eeh_init(void)
+static int eeh_init(void)
{
struct pci_controller *hose, *tmp;
- struct pci_dn *pdn;
- static int cnt = 0;
int ret = 0;
- /*
- * We have to delay the initialization on PowerNV after
- * the PCI hierarchy tree has been built because the PEs
- * are figured out based on PCI devices instead of device
- * tree nodes
- */
- if (machine_is(powernv) && cnt++ <= 0)
- return ret;
-
/* Register reboot notifier */
ret = register_reboot_notifier(&eeh_reboot_nb);
if (ret) {
@@ -1028,22 +1029,7 @@ int eeh_init(void)
if (ret)
return ret;
- /* Enable EEH for all adapters */
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
- pdn = hose->pci_data;
- traverse_pci_dn(pdn, eeh_ops->probe, NULL);
- }
-
- /*
- * Call platform post-initialization. Actually, It's good chance
- * to inform platform that EEH is ready to supply service if the
- * I/O cache stuff has been built up.
- */
- if (eeh_ops->post_init) {
- ret = eeh_ops->post_init();
- if (ret)
- return ret;
- }
+ eeh_probe_devices();
if (eeh_enabled())
pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
@@ -1757,10 +1743,6 @@ static int eeh_enable_dbgfs_set(void *data, u64 val)
else
eeh_add_flag(EEH_FORCE_DISABLED);
- /* Notify the backend */
- if (eeh_ops->post_init)
- eeh_ops->post_init();
-
return 0;
}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 4e1b433f6cb5..4f71e4c9beb7 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -623,7 +623,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
struct eeh_rmv_data *rmv_data)
{
struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
- struct timeval tstamp;
+ time64_t tstamp;
int cnt, rc;
struct eeh_dev *edev;
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 2e8d1b2b5af4..2d4956e97aa9 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -526,16 +526,16 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
*/
void eeh_pe_update_time_stamp(struct eeh_pe *pe)
{
- struct timeval tstamp;
+ time64_t tstamp;
if (!pe) return;
if (pe->freeze_count <= 0) {
pe->freeze_count = 0;
- do_gettimeofday(&pe->tstamp);
+ pe->tstamp = ktime_get_seconds();
} else {
- do_gettimeofday(&tstamp);
- if (tstamp.tv_sec - pe->tstamp.tv_sec > 3600) {
+ tstamp = ktime_get_seconds();
+ if (tstamp - pe->tstamp > 3600) {
pe->tstamp = tstamp;
pe->freeze_count = 0;
}
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 4a0fd4f40245..3320bcac7192 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -539,7 +539,7 @@ _GLOBAL(_switch)
std r6,PACACURRENT(r13) /* Set new 'current' */
ld r8,KSP(r4) /* new stack pointer */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
BEGIN_MMU_FTR_SECTION
b 2f
END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
@@ -588,7 +588,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
slbmte r7,r0
isync
2:
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
CURRENT_THREAD_INFO(r7, r8) /* base of new stack */
/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 1c80bd292e48..e441b469dc8f 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -114,6 +114,7 @@ EXC_VIRT_NONE(0x4000, 0x100)
cmpwi cr3,r10,2 ; \
BRANCH_TO_C000(r10, system_reset_idle_common) ; \
1: \
+ KVMTEST_PR(n) ; \
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#else
#define IDLETEST NOTEST
@@ -130,6 +131,7 @@ EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
EXC_REAL_END(system_reset, 0x100, 0x100)
EXC_VIRT_NONE(0x4100, 0x100)
+TRAMP_KVM(PACA_EXNMI, 0x100)
#ifdef CONFIG_PPC_P7_NAP
EXC_COMMON_BEGIN(system_reset_idle_common)
@@ -233,7 +235,7 @@ BEGIN_FTR_SECTION
addi r10,r10,1 /* increment paca->in_mce */
sth r10,PACA_IN_MCE(r13)
/* Limit nested MCE to level 4 to avoid stack overflow */
- cmpwi r10,4
+ cmpwi r10,MAX_MCE_DEPTH
bgt 2f /* Check if we hit limit of 4 */
std r11,GPR1(r1) /* Save r1 on the stack. */
std r11,0(r1) /* make stack chain pointer */
@@ -542,7 +544,7 @@ EXC_COMMON_BEGIN(instruction_access_common)
RECONCILE_IRQ_STATE(r10, r11)
ld r12,_MSR(r1)
ld r3,_NIP(r1)
- andis. r4,r12,DSISR_BAD_FAULT_64S@h
+ andis. r4,r12,DSISR_SRR1_MATCH_64S@h
li r5,0x400
std r3,_DAR(r1)
std r4,_DSISR(r1)
@@ -606,7 +608,7 @@ EXC_COMMON_BEGIN(slb_miss_common)
cmpdi cr5,r11,MSR_RI
crset 4*cr0+eq
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
BEGIN_MMU_FTR_SECTION
bl slb_allocate
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
@@ -888,12 +890,6 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
#define LOAD_SYSCALL_HANDLER(reg) \
__LOAD_HANDLER(reg, system_call_common)
-#define SYSCALL_FASTENDIAN_TEST \
-BEGIN_FTR_SECTION \
- cmpdi r0,0x1ebe ; \
- beq- 1f ; \
-END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
-
/*
* After SYSCALL_KVMTEST, we reach here with PACA in r13, r13 in r9,
* and HMT_MEDIUM.
@@ -908,6 +904,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
rfid ; \
b . ; /* prevent speculative execution */
+#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
+#define SYSCALL_FASTENDIAN_TEST \
+BEGIN_FTR_SECTION \
+ cmpdi r0,0x1ebe ; \
+ beq- 1f ; \
+END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
+
#define SYSCALL_FASTENDIAN \
/* Fast LE/BE switch system call */ \
1: mfspr r12,SPRN_SRR1 ; \
@@ -916,6 +919,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
mr r13,r9 ; \
rfid ; /* return to userspace */ \
b . ; /* prevent speculative execution */
+#else
+#define SYSCALL_FASTENDIAN_TEST
+#define SYSCALL_FASTENDIAN
+#endif /* CONFIG_PPC_FAST_ENDIAN_SWITCH */
#if defined(CONFIG_RELOCATABLE)
/*
@@ -1033,6 +1040,8 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
EXCEPTION_PROLOG_COMMON_3(0xe60)
addi r3,r1,STACK_FRAME_OVERHEAD
BRANCH_LINK_TO_FAR(hmi_exception_realmode) /* Function call ABI */
+ cmpdi cr0,r3,0
+
/* Windup the stack. */
/* Move original HSRR0 and HSRR1 into the respective regs */
ld r9,_MSR(r1)
@@ -1049,10 +1058,15 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
REST_8GPRS(2, r1)
REST_GPR(10, r1)
ld r11,_CCR(r1)
+ REST_2GPRS(12, r1)
+ bne 1f
mtcr r11
REST_GPR(11, r1)
- REST_2GPRS(12, r1)
- /* restore original r1. */
+ ld r1,GPR1(r1)
+ hrfid
+
+1: mtcr r11
+ REST_GPR(11, r1)
ld r1,GPR1(r1)
/*
@@ -1065,8 +1079,9 @@ hmi_exception_after_realmode:
EXCEPTION_PROLOG_0(PACA_EXGEN)
b tramp_real_hmi_exception
-EXC_COMMON_ASYNC(hmi_exception_common, 0xe60, handle_hmi_exception)
-
+EXC_COMMON_BEGIN(hmi_exception_common)
+EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, handle_hmi_exception,
+ ret_from_except, FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON)
EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20)
EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80)
@@ -1505,8 +1520,8 @@ USE_TEXT_SECTION()
*/
.balign IFETCH_ALIGN_BYTES
do_hash_page:
- #ifdef CONFIG_PPC_STD_MMU_64
- lis r0,DSISR_BAD_FAULT_64S@h
+#ifdef CONFIG_PPC_BOOK3S_64
+ lis r0,(DSISR_BAD_FAULT_64S|DSISR_DABRMATCH)@h
ori r0,r0,DSISR_BAD_FAULT_64S@l
and. r0,r4,r0 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
@@ -1536,7 +1551,7 @@ do_hash_page:
/* Reload DSISR into r4 for the DABR check below */
ld r4,_DSISR(r1)
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
/* Here we have a page fault that hash_page can't handle. */
handle_page_fault:
@@ -1565,7 +1580,7 @@ handle_dabr_fault:
12: b ret_from_except_lite
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
/* We have a page fault that hash_page could handle but HV refused
* the PTE insertion
*/
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index e1431800bfb9..04ea5c04fd24 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1270,10 +1270,15 @@ static ssize_t fadump_release_memory_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
+ int input = -1;
+
if (!fw_dump.dump_active)
return -EPERM;
- if (buf[0] == '1') {
+ if (kstrtoint(buf, 0, &input))
+ return -EINVAL;
+
+ if (input == 1) {
/*
* Take away the '/proc/vmcore'. We are releasing the dump
* memory, hence it will not be valid anymore.
@@ -1307,21 +1312,25 @@ static ssize_t fadump_register_store(struct kobject *kobj,
const char *buf, size_t count)
{
int ret = 0;
+ int input = -1;
if (!fw_dump.fadump_enabled || fdm_active)
return -EPERM;
+ if (kstrtoint(buf, 0, &input))
+ return -EINVAL;
+
mutex_lock(&fadump_mutex);
- switch (buf[0]) {
- case '0':
+ switch (input) {
+ case 0:
if (fw_dump.dump_registered == 0) {
goto unlock_out;
}
/* Un-register Firmware-assisted dump */
fadump_unregister_dump(&fdm);
break;
- case '1':
+ case 1:
if (fw_dump.dump_registered == 1) {
ret = -EEXIST;
goto unlock_out;
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 8c54166491e7..29b2fed93289 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -388,7 +388,7 @@ DataAccess:
EXCEPTION_PROLOG
mfspr r10,SPRN_DSISR
stw r10,_DSISR(r11)
- andis. r0,r10,DSISR_BAD_FAULT_32S@h
+ andis. r0,r10,(DSISR_BAD_FAULT_32S|DSISR_DABRMATCH)@h
bne 1f /* if not, try to put a PTE */
mfspr r4,SPRN_DAR /* into the hash table */
rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index ff8511d6d8ea..aa71a90f5222 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -55,12 +55,18 @@
*
* For pSeries or server processors:
* 1. The MMU is off & open firmware is running in real mode.
- * 2. The kernel is entered at __start
+ * 2. The primary CPU enters at __start.
+ * 3. If the RTAS supports "query-cpu-stopped-state", then secondary
+ * CPUs will enter as directed by "start-cpu" RTAS call, which is
+ * generic_secondary_smp_init, with PIR in r3.
+ * 4. Else the secondary CPUs will enter at secondary_hold (0x60) as
+ * directed by the "start-cpu" RTS call, with PIR in r3.
* -or- For OPAL entry:
- * 1. The MMU is off, processor in HV mode, primary CPU enters at 0
- * with device-tree in gpr3. We also get OPAL base in r8 and
- * entry in r9 for debugging purposes
- * 2. Secondary processors enter at 0x60 with PIR in gpr3
+ * 1. The MMU is off, processor in HV mode.
+ * 2. The primary CPU enters at 0 with device-tree in r3, OPAL base
+ * in r8, and entry in r9 for debugging purposes.
+ * 3. Secondary CPUs enter as directed by OPAL_START_CPU call, which
+ * is at generic_secondary_smp_init, with PIR in r3.
*
* For Book3E processors:
* 1. The MMU is on running in AS0 in a state defined in ePAPR
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 1125c9be9e06..01e1c1997893 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -112,12 +112,14 @@ power9_save_additional_sprs:
std r4, STOP_HFSCR(r13)
mfspr r3, SPRN_MMCRA
- mfspr r4, SPRN_MMCR1
+ mfspr r4, SPRN_MMCR0
std r3, STOP_MMCRA(r13)
- std r4, STOP_MMCR1(r13)
+ std r4, _MMCR0(r1)
- mfspr r3, SPRN_MMCR2
- std r3, STOP_MMCR2(r13)
+ mfspr r3, SPRN_MMCR1
+ mfspr r4, SPRN_MMCR2
+ std r3, STOP_MMCR1(r13)
+ std r4, STOP_MMCR2(r13)
blr
power9_restore_additional_sprs:
@@ -135,11 +137,14 @@ power9_restore_additional_sprs:
ld r4, STOP_MMCRA(r13)
mtspr SPRN_HFSCR, r3
mtspr SPRN_MMCRA, r4
- /* We have already restored PACA_MMCR0 */
- ld r3, STOP_MMCR1(r13)
- ld r4, STOP_MMCR2(r13)
- mtspr SPRN_MMCR1, r3
- mtspr SPRN_MMCR2, r4
+
+ ld r3, _MMCR0(r1)
+ ld r4, STOP_MMCR1(r13)
+ mtspr SPRN_MMCR0, r3
+ mtspr SPRN_MMCR1, r4
+
+ ld r3, STOP_MMCR2(r13)
+ mtspr SPRN_MMCR2, r3
blr
/*
@@ -319,20 +324,13 @@ enter_winkle:
/*
* r3 - PSSCR value corresponding to the requested stop state.
*/
+power_enter_stop:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-power_enter_stop_kvm_rm:
- /*
- * This is currently unused because POWER9 KVM does not have to
- * gather secondary threads into sibling mode, but the code is
- * here in case that function is required.
- *
- * Tell KVM we're entering idle.
- */
+ /* Tell KVM we're entering idle */
li r4,KVM_HWTHREAD_IN_IDLE
/* DO THIS IN REAL MODE! See comment above. */
stb r4,HSTATE_HWTHREAD_STATE(r13)
#endif
-power_enter_stop:
/*
* Check if we are executing the lite variant with ESL=EC=0
*/
@@ -357,13 +355,15 @@ power_enter_stop:
b pnv_wakeup_noloss
.Lhandle_esl_ec_set:
+BEGIN_FTR_SECTION
/*
- * POWER9 DD2 can incorrectly set PMAO when waking up after a
- * state-loss idle. Saving and restoring MMCR0 over idle is a
+ * POWER9 DD2.0 or earlier can incorrectly set PMAO when waking up after
+ * a state-loss idle. Saving and restoring MMCR0 over idle is a
* workaround.
*/
mfspr r4,SPRN_MMCR0
std r4,_MMCR0(r1)
+END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1)
/*
* Check if the requested state is a deep idle state.
@@ -496,18 +496,6 @@ pnv_powersave_wakeup_mce:
b pnv_powersave_wakeup
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-kvm_start_guest_check:
- li r0,KVM_HWTHREAD_IN_KERNEL
- stb r0,HSTATE_HWTHREAD_STATE(r13)
- /* Order setting hwthread_state vs. testing hwthread_req */
- sync
- lbz r0,HSTATE_HWTHREAD_REQ(r13)
- cmpwi r0,0
- beqlr
- b kvm_start_guest
-#endif
-
/*
* Called from reset vector for powersave wakeups.
* cr3 - set to gt if waking up with partial/complete hypervisor state loss
@@ -532,9 +520,15 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mr r3,r12
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-BEGIN_FTR_SECTION
- bl kvm_start_guest_check
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+ li r0,KVM_HWTHREAD_IN_KERNEL
+ stb r0,HSTATE_HWTHREAD_STATE(r13)
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ sync
+ lbz r0,HSTATE_HWTHREAD_REQ(r13)
+ cmpwi r0,0
+ beq 1f
+ b kvm_start_guest
+1:
#endif
/* Return SRR1 from power7_nap() */
@@ -555,15 +549,17 @@ pnv_restore_hyp_resource_arch300:
* then clear bit 60 in MMCRA to ensure the PMU starts running.
*/
blt cr3,1f
+BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
ld r1,PACAR1(r13)
+ ld r4,_MMCR0(r1)
+ mtspr SPRN_MMCR0,r4
+END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1)
mfspr r4,SPRN_MMCRA
ori r4,r4,(1 << (63-60))
mtspr SPRN_MMCRA,r4
xori r4,r4,(1 << (63-60))
mtspr SPRN_MMCRA,r4
- ld r4,_MMCR0(r1)
- mtspr SPRN_MMCR0,r4
1:
/*
* POWER ISA 3. Use PSSCR to determine if we
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 4e65bf82f5e0..b7a84522e652 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -143,6 +143,13 @@ notrace unsigned int __check_irq_replay(void)
*/
unsigned char happened = local_paca->irq_happened;
+ /*
+ * We are responding to the next interrupt, so interrupt-off
+ * latencies should be reset here.
+ */
+ trace_hardirqs_on();
+ trace_hardirqs_off();
+
if (happened & PACA_IRQ_HARD_DIS) {
/* Clear bit 0 which we wouldn't clear otherwise */
local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
@@ -270,6 +277,7 @@ notrace void arch_local_irq_restore(unsigned long en)
#endif /* CONFIG_TRACE_IRQFLAGS */
set_soft_enabled(0);
+ trace_hardirqs_off();
/*
* Check if anything needs to be re-emitted. We haven't
@@ -279,6 +287,7 @@ notrace void arch_local_irq_restore(unsigned long en)
replay = __check_irq_replay();
/* We can soft-enable now */
+ trace_hardirqs_on();
set_soft_enabled(1);
/*
@@ -394,11 +403,19 @@ bool prep_irq_for_idle_irqsoff(void)
/*
* Take the SRR1 wakeup reason, index into this table to find the
* appropriate irq_happened bit.
+ *
+ * Sytem reset exceptions taken in idle state also come through here,
+ * but they are NMI interrupts so do not need to wait for IRQs to be
+ * restored, and should be taken as early as practical. These are marked
+ * with 0xff in the table. The Power ISA specifies 0100b as the system
+ * reset interrupt reason.
*/
+#define IRQ_SYSTEM_RESET 0xff
+
static const u8 srr1_to_lazyirq[0x10] = {
0, 0, 0,
PACA_IRQ_DBELL,
- 0,
+ IRQ_SYSTEM_RESET,
PACA_IRQ_DBELL,
PACA_IRQ_DEC,
0,
@@ -407,15 +424,43 @@ static const u8 srr1_to_lazyirq[0x10] = {
PACA_IRQ_HMI,
0, 0, 0, 0, 0 };
+void replay_system_reset(void)
+{
+ struct pt_regs regs;
+
+ ppc_save_regs(&regs);
+ regs.trap = 0x100;
+ get_paca()->in_nmi = 1;
+ system_reset_exception(&regs);
+ get_paca()->in_nmi = 0;
+}
+EXPORT_SYMBOL_GPL(replay_system_reset);
+
void irq_set_pending_from_srr1(unsigned long srr1)
{
unsigned int idx = (srr1 & SRR1_WAKEMASK_P8) >> 18;
+ u8 reason = srr1_to_lazyirq[idx];
+
+ /*
+ * Take the system reset now, which is immediately after registers
+ * are restored from idle. It's an NMI, so interrupts need not be
+ * re-enabled before it is taken.
+ */
+ if (unlikely(reason == IRQ_SYSTEM_RESET)) {
+ replay_system_reset();
+ return;
+ }
/*
* The 0 index (SRR1[42:45]=b0000) must always evaluate to 0,
- * so this can be called unconditionally with srr1 wake reason.
+ * so this can be called unconditionally with the SRR1 wake
+ * reason as returned by the idle code, which uses 0 to mean no
+ * interrupt.
+ *
+ * If a future CPU was to designate this as an interrupt reason,
+ * then a new index for no interrupt must be assigned.
*/
- local_paca->irq_happened |= srr1_to_lazyirq[idx];
+ local_paca->irq_happened |= reason;
}
#endif /* CONFIG_PPC_BOOK3S */
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c
index 6c089d9757c9..7a1f99f1b47f 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -25,6 +25,21 @@
#include <linux/preempt.h>
#include <linux/ftrace.h>
+/*
+ * This is called from ftrace code after invoking registered handlers to
+ * disambiguate regs->nip changes done by jprobes and livepatch. We check if
+ * there is an active jprobe at the provided address (mcount location).
+ */
+int __is_active_jprobe(unsigned long addr)
+{
+ if (!preemptible()) {
+ struct kprobe *p = raw_cpu_read(current_kprobe);
+ return (p && (unsigned long)p->addr == addr) ? 1 : 0;
+ }
+
+ return 0;
+}
+
static nokprobe_inline
int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb, unsigned long orig_nip)
@@ -60,11 +75,8 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
{
struct kprobe *p;
struct kprobe_ctlblk *kcb;
- unsigned long flags;
- /* Disable irq for emulating a breakpoint and avoiding preempt */
- local_irq_save(flags);
- hard_irq_disable();
+ preempt_disable();
p = get_kprobe((kprobe_opcode_t *)nip);
if (unlikely(!p) || kprobe_disabled(p))
@@ -86,13 +98,17 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
if (!p->pre_handler || !p->pre_handler(p, regs))
__skip_singlestep(p, regs, kcb, orig_nip);
- /*
- * If pre_handler returns !0, it sets regs->nip and
- * resets current kprobe.
- */
+ else {
+ /*
+ * If pre_handler returns !0, it sets regs->nip and
+ * resets current kprobe. In this case, we should not
+ * re-enable preemption.
+ */
+ return;
+ }
}
end:
- local_irq_restore(flags);
+ preempt_enable_no_resched();
}
NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index bebc3007a793..ca5d5a081e75 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -43,12 +43,6 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
-int is_current_kprobe_addr(unsigned long addr)
-{
- struct kprobe *p = kprobe_running();
- return (p && (unsigned long)p->addr == addr) ? 1 : 0;
-}
-
bool arch_within_kprobe_blacklist(unsigned long addr)
{
return (addr >= (unsigned long)__kprobes_text_start &&
@@ -59,7 +53,7 @@ bool arch_within_kprobe_blacklist(unsigned long addr)
kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset)
{
- kprobe_opcode_t *addr;
+ kprobe_opcode_t *addr = NULL;
#ifdef PPC64_ELF_ABI_v2
/* PPC64 ABIv2 needs local entry point */
@@ -91,36 +85,29 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset)
* Also handle <module:symbol> format.
*/
char dot_name[MODULE_NAME_LEN + 1 + KSYM_NAME_LEN];
- const char *modsym;
bool dot_appended = false;
- if ((modsym = strchr(name, ':')) != NULL) {
- modsym++;
- if (*modsym != '\0' && *modsym != '.') {
- /* Convert to <module:.symbol> */
- strncpy(dot_name, name, modsym - name);
- dot_name[modsym - name] = '.';
- dot_name[modsym - name + 1] = '\0';
- strncat(dot_name, modsym,
- sizeof(dot_name) - (modsym - name) - 2);
- dot_appended = true;
- } else {
- dot_name[0] = '\0';
- strncat(dot_name, name, sizeof(dot_name) - 1);
- }
- } else if (name[0] != '.') {
- dot_name[0] = '.';
- dot_name[1] = '\0';
- strncat(dot_name, name, KSYM_NAME_LEN - 2);
+ const char *c;
+ ssize_t ret = 0;
+ int len = 0;
+
+ if ((c = strnchr(name, MODULE_NAME_LEN, ':')) != NULL) {
+ c++;
+ len = c - name;
+ memcpy(dot_name, name, len);
+ } else
+ c = name;
+
+ if (*c != '\0' && *c != '.') {
+ dot_name[len++] = '.';
dot_appended = true;
- } else {
- dot_name[0] = '\0';
- strncat(dot_name, name, KSYM_NAME_LEN - 1);
}
- addr = (kprobe_opcode_t *)kallsyms_lookup_name(dot_name);
- if (!addr && dot_appended) {
- /* Let's try the original non-dot symbol lookup */
+ ret = strscpy(dot_name + len, c, KSYM_NAME_LEN);
+ if (ret > 0)
+ addr = (kprobe_opcode_t *)kallsyms_lookup_name(dot_name);
+
+ /* Fallback to the original non-dot symbol lookup */
+ if (!addr && dot_appended)
addr = (kprobe_opcode_t *)kallsyms_lookup_name(name);
- }
#else
addr = (kprobe_opcode_t *)kallsyms_lookup_name(name);
#endif
@@ -239,7 +226,7 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
}
NOKPROBE_SYMBOL(arch_prepare_kretprobe);
-int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
+static int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
{
int ret;
unsigned int insn = *p->ainsn.insn;
@@ -261,9 +248,20 @@ int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
*/
printk("Can't step on instruction %x\n", insn);
BUG();
- } else if (ret == 0)
- /* This instruction can't be boosted */
- p->ainsn.boostable = -1;
+ } else {
+ /*
+ * If we haven't previously emulated this instruction, then it
+ * can't be boosted. Note it down so we don't try to do so again.
+ *
+ * If, however, we had emulated this instruction in the past,
+ * then this is just an error with the current run (for
+ * instance, exceptions due to a load/store). We return 0 so
+ * that this is now single-stepped, but continue to try
+ * emulating it in subsequent probe hits.
+ */
+ if (unlikely(p->ainsn.boostable != 1))
+ p->ainsn.boostable = -1;
+ }
return ret;
}
@@ -639,24 +637,22 @@ NOKPROBE_SYMBOL(setjmp_pre_handler);
void __used jprobe_return(void)
{
- asm volatile("trap" ::: "memory");
+ asm volatile("jprobe_return_trap:\n"
+ "trap\n"
+ ::: "memory");
}
NOKPROBE_SYMBOL(jprobe_return);
-static void __used jprobe_return_end(void)
-{
-}
-NOKPROBE_SYMBOL(jprobe_return_end);
-
int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- /*
- * FIXME - we should ideally be validating that we got here 'cos
- * of the "trap" in jprobe_return() above, before restoring the
- * saved regs...
- */
+ if (regs->nip != ppc_kallsyms_lookup_name("jprobe_return_trap")) {
+ pr_debug("longjmp_break_handler NIP (0x%lx) does not match jprobe_return_trap (0x%lx)\n",
+ regs->nip, ppc_kallsyms_lookup_name("jprobe_return_trap"));
+ return 0;
+ }
+
memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
/* It's OK to start function graph tracing again */
unpause_graph_tracing();
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 5c12e21d0d1a..49d34d7271e7 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -360,7 +360,7 @@ void default_machine_kexec(struct kimage *image)
/* NOTREACHED */
}
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
/* Values we need to export to the second kernel via the device tree. */
static unsigned long htab_base;
static unsigned long htab_size;
@@ -402,4 +402,4 @@ static int __init export_htab_values(void)
return 0;
}
late_initcall(export_htab_values);
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 9b2ea7e71c06..742e4658c5dc 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -39,11 +39,21 @@ static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
static DEFINE_PER_CPU(int, mce_queue_count);
static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
+/* Queue for delayed MCE UE events. */
+static DEFINE_PER_CPU(int, mce_ue_count);
+static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
+ mce_ue_event_queue);
+
static void machine_check_process_queued_event(struct irq_work *work);
+void machine_check_ue_event(struct machine_check_event *evt);
+static void machine_process_ue_event(struct work_struct *work);
+
static struct irq_work mce_event_process_work = {
.func = machine_check_process_queued_event,
};
+DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
+
static void mce_set_error_info(struct machine_check_event *mce,
struct mce_error_info *mce_err)
{
@@ -82,7 +92,7 @@ static void mce_set_error_info(struct machine_check_event *mce,
*/
void save_mce_event(struct pt_regs *regs, long handled,
struct mce_error_info *mce_err,
- uint64_t nip, uint64_t addr)
+ uint64_t nip, uint64_t addr, uint64_t phys_addr)
{
int index = __this_cpu_inc_return(mce_nest_count) - 1;
struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
@@ -140,6 +150,11 @@ void save_mce_event(struct pt_regs *regs, long handled,
} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
mce->u.ue_error.effective_address_provided = true;
mce->u.ue_error.effective_address = addr;
+ if (phys_addr != ULONG_MAX) {
+ mce->u.ue_error.physical_address_provided = true;
+ mce->u.ue_error.physical_address = phys_addr;
+ machine_check_ue_event(mce);
+ }
}
return;
}
@@ -193,6 +208,26 @@ void release_mce_event(void)
get_mce_event(NULL, true);
}
+
+/*
+ * Queue up the MCE event which then can be handled later.
+ */
+void machine_check_ue_event(struct machine_check_event *evt)
+{
+ int index;
+
+ index = __this_cpu_inc_return(mce_ue_count) - 1;
+ /* If queue is full, just return for now. */
+ if (index >= MAX_MC_EVT) {
+ __this_cpu_dec(mce_ue_count);
+ return;
+ }
+ memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
+
+ /* Queue work to process this event later. */
+ schedule_work(&mce_ue_event_work);
+}
+
/*
* Queue up the MCE event which then can be handled later.
*/
@@ -215,7 +250,39 @@ void machine_check_queue_event(void)
/* Queue irq work to process this event later. */
irq_work_queue(&mce_event_process_work);
}
-
+/*
+ * process pending MCE event from the mce event queue. This function will be
+ * called during syscall exit.
+ */
+static void machine_process_ue_event(struct work_struct *work)
+{
+ int index;
+ struct machine_check_event *evt;
+
+ while (__this_cpu_read(mce_ue_count) > 0) {
+ index = __this_cpu_read(mce_ue_count) - 1;
+ evt = this_cpu_ptr(&mce_ue_event_queue[index]);
+#ifdef CONFIG_MEMORY_FAILURE
+ /*
+ * This should probably queued elsewhere, but
+ * oh! well
+ */
+ if (evt->error_type == MCE_ERROR_TYPE_UE) {
+ if (evt->u.ue_error.physical_address_provided) {
+ unsigned long pfn;
+
+ pfn = evt->u.ue_error.physical_address >>
+ PAGE_SHIFT;
+ memory_failure(pfn, SIGBUS, 0);
+ } else
+ pr_warn("Failed to identify bad address from "
+ "where the uncorrectable error (UE) "
+ "was generated\n");
+ }
+#endif
+ __this_cpu_dec(mce_ue_count);
+ }
+}
/*
* process pending MCE event from the mce event queue. This function will be
* called during syscall exit.
@@ -223,6 +290,7 @@ void machine_check_queue_event(void)
static void machine_check_process_queued_event(struct irq_work *work)
{
int index;
+ struct machine_check_event *evt;
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
@@ -232,8 +300,8 @@ static void machine_check_process_queued_event(struct irq_work *work)
*/
while (__this_cpu_read(mce_queue_count) > 0) {
index = __this_cpu_read(mce_queue_count) - 1;
- machine_check_print_event_info(
- this_cpu_ptr(&mce_event_queue[index]), false);
+ evt = this_cpu_ptr(&mce_event_queue[index]);
+ machine_check_print_event_info(evt, false);
__this_cpu_dec(mce_queue_count);
}
}
@@ -340,7 +408,7 @@ void machine_check_print_event_info(struct machine_check_event *evt,
printk("%s Effective address: %016llx\n",
level, evt->u.ue_error.effective_address);
if (evt->u.ue_error.physical_address_provided)
- printk("%s Physical address: %016llx\n",
+ printk("%s Physical address: %016llx\n",
level, evt->u.ue_error.physical_address);
break;
case MCE_ERROR_TYPE_SLB:
@@ -411,45 +479,6 @@ void machine_check_print_event_info(struct machine_check_event *evt,
}
EXPORT_SYMBOL_GPL(machine_check_print_event_info);
-uint64_t get_mce_fault_addr(struct machine_check_event *evt)
-{
- switch (evt->error_type) {
- case MCE_ERROR_TYPE_UE:
- if (evt->u.ue_error.effective_address_provided)
- return evt->u.ue_error.effective_address;
- break;
- case MCE_ERROR_TYPE_SLB:
- if (evt->u.slb_error.effective_address_provided)
- return evt->u.slb_error.effective_address;
- break;
- case MCE_ERROR_TYPE_ERAT:
- if (evt->u.erat_error.effective_address_provided)
- return evt->u.erat_error.effective_address;
- break;
- case MCE_ERROR_TYPE_TLB:
- if (evt->u.tlb_error.effective_address_provided)
- return evt->u.tlb_error.effective_address;
- break;
- case MCE_ERROR_TYPE_USER:
- if (evt->u.user_error.effective_address_provided)
- return evt->u.user_error.effective_address;
- break;
- case MCE_ERROR_TYPE_RA:
- if (evt->u.ra_error.effective_address_provided)
- return evt->u.ra_error.effective_address;
- break;
- case MCE_ERROR_TYPE_LINK:
- if (evt->u.link_error.effective_address_provided)
- return evt->u.link_error.effective_address;
- break;
- default:
- case MCE_ERROR_TYPE_UNKNOWN:
- break;
- }
- return 0;
-}
-EXPORT_SYMBOL(get_mce_fault_addr);
-
/*
* This function is called in real mode. Strictly no printk's please.
*
@@ -470,6 +499,34 @@ long hmi_exception_realmode(struct pt_regs *regs)
{
__this_cpu_inc(irq_stat.hmi_exceptions);
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */
+ if (pvr_version_is(PVR_POWER9)) {
+ unsigned long hmer = mfspr(SPRN_HMER);
+
+ /* Do we have the debug bit set */
+ if (hmer & PPC_BIT(17)) {
+ hmer &= ~PPC_BIT(17);
+ mtspr(SPRN_HMER, hmer);
+
+ /*
+ * Now to avoid problems with soft-disable we
+ * only do the emulation if we are coming from
+ * user space
+ */
+ if (user_mode(regs))
+ local_paca->hmi_p9_special_emu = 1;
+
+ /*
+ * Don't bother going to OPAL if that's the
+ * only relevant bit.
+ */
+ if (!(hmer & mfspr(SPRN_HMEER)))
+ return local_paca->hmi_p9_special_emu;
+ }
+ }
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
wait_for_subcore_guest_exit();
if (ppc_md.hmi_exception_early)
@@ -477,5 +534,5 @@ long hmi_exception_realmode(struct pt_regs *regs)
wait_for_tb_resync();
- return 0;
+ return 1;
}
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 72f153c6f3fa..644f7040b91c 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -27,6 +27,36 @@
#include <asm/mmu.h>
#include <asm/mce.h>
#include <asm/machdep.h>
+#include <asm/pgtable.h>
+#include <asm/pte-walk.h>
+#include <asm/sstep.h>
+#include <asm/exception-64s.h>
+
+/*
+ * Convert an address related to an mm to a PFN. NOTE: we are in real
+ * mode, we could potentially race with page table updates.
+ */
+static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
+{
+ pte_t *ptep;
+ unsigned long flags;
+ struct mm_struct *mm;
+
+ if (user_mode(regs))
+ mm = current->mm;
+ else
+ mm = &init_mm;
+
+ local_irq_save(flags);
+ if (mm == current->mm)
+ ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
+ else
+ ptep = find_init_mm_pte(addr, NULL);
+ local_irq_restore(flags);
+ if (!ptep || pte_special(*ptep))
+ return ULONG_MAX;
+ return pte_pfn(*ptep);
+}
static void flush_tlb_206(unsigned int num_sets, unsigned int action)
{
@@ -128,7 +158,7 @@ void __flush_tlb_power9(unsigned int action)
{
unsigned int num_sets;
- if (radix_enabled())
+ if (early_radix_enabled())
num_sets = POWER9_TLB_SETS_RADIX;
else
num_sets = POWER9_TLB_SETS_HASH;
@@ -138,7 +168,7 @@ void __flush_tlb_power9(unsigned int action)
/* flush SLBs and reload */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
static void flush_and_reload_slb(void)
{
struct slb_shadow *slb;
@@ -185,7 +215,7 @@ static void flush_erat(void)
static int mce_flush(int what)
{
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
if (what == MCE_FLUSH_SLB) {
flush_and_reload_slb();
return 1;
@@ -421,9 +451,45 @@ static const struct mce_derror_table mce_p9_derror_table[] = {
MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
{ 0, false, 0, 0, 0, 0 } };
+static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr,
+ uint64_t *phys_addr)
+{
+ /*
+ * Carefully look at the NIP to determine
+ * the instruction to analyse. Reading the NIP
+ * in real-mode is tricky and can lead to recursive
+ * faults
+ */
+ int instr;
+ unsigned long pfn, instr_addr;
+ struct instruction_op op;
+ struct pt_regs tmp = *regs;
+
+ pfn = addr_to_pfn(regs, regs->nip);
+ if (pfn != ULONG_MAX) {
+ instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK);
+ instr = *(unsigned int *)(instr_addr);
+ if (!analyse_instr(&op, &tmp, instr)) {
+ pfn = addr_to_pfn(regs, op.ea);
+ *addr = op.ea;
+ *phys_addr = (pfn << PAGE_SHIFT);
+ return 0;
+ }
+ /*
+ * analyse_instr() might fail if the instruction
+ * is not a load/store, although this is unexpected
+ * for load/store errors or if we got the NIP
+ * wrong
+ */
+ }
+ *addr = 0;
+ return -1;
+}
+
static int mce_handle_ierror(struct pt_regs *regs,
const struct mce_ierror_table table[],
- struct mce_error_info *mce_err, uint64_t *addr)
+ struct mce_error_info *mce_err, uint64_t *addr,
+ uint64_t *phys_addr)
{
uint64_t srr1 = regs->msr;
int handled = 0;
@@ -475,8 +541,22 @@ static int mce_handle_ierror(struct pt_regs *regs,
}
mce_err->severity = table[i].severity;
mce_err->initiator = table[i].initiator;
- if (table[i].nip_valid)
+ if (table[i].nip_valid) {
*addr = regs->nip;
+ if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
+ table[i].error_type == MCE_ERROR_TYPE_UE) {
+ unsigned long pfn;
+
+ if (get_paca()->in_mce < MAX_MCE_DEPTH) {
+ pfn = addr_to_pfn(regs, regs->nip);
+ if (pfn != ULONG_MAX) {
+ *phys_addr =
+ (pfn << PAGE_SHIFT);
+ handled = 1;
+ }
+ }
+ }
+ }
return handled;
}
@@ -489,7 +569,8 @@ static int mce_handle_ierror(struct pt_regs *regs,
static int mce_handle_derror(struct pt_regs *regs,
const struct mce_derror_table table[],
- struct mce_error_info *mce_err, uint64_t *addr)
+ struct mce_error_info *mce_err, uint64_t *addr,
+ uint64_t *phys_addr)
{
uint64_t dsisr = regs->dsisr;
int handled = 0;
@@ -555,7 +636,17 @@ static int mce_handle_derror(struct pt_regs *regs,
mce_err->initiator = table[i].initiator;
if (table[i].dar_valid)
*addr = regs->dar;
-
+ else if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
+ table[i].error_type == MCE_ERROR_TYPE_UE) {
+ /*
+ * We do a maximum of 4 nested MCE calls, see
+ * kernel/exception-64s.h
+ */
+ if (get_paca()->in_mce < MAX_MCE_DEPTH)
+ if (!mce_find_instr_ea_and_pfn(regs, addr,
+ phys_addr))
+ handled = 1;
+ }
found = 1;
}
@@ -592,19 +683,21 @@ static long mce_handle_error(struct pt_regs *regs,
const struct mce_ierror_table itable[])
{
struct mce_error_info mce_err = { 0 };
- uint64_t addr;
+ uint64_t addr, phys_addr;
uint64_t srr1 = regs->msr;
long handled;
if (SRR1_MC_LOADSTORE(srr1))
- handled = mce_handle_derror(regs, dtable, &mce_err, &addr);
+ handled = mce_handle_derror(regs, dtable, &mce_err, &addr,
+ &phys_addr);
else
- handled = mce_handle_ierror(regs, itable, &mce_err, &addr);
+ handled = mce_handle_ierror(regs, itable, &mce_err, &addr,
+ &phys_addr);
if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
handled = mce_handle_ue_error(regs);
- save_mce_event(regs, handled, &mce_err, regs->nip, addr);
+ save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);
return handled;
}
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 0b0f89685b67..759104b99f9f 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -429,7 +429,8 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs,
/* Find this stub, or if that fails, the next avail. entry */
stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr;
for (i = 0; stub_func_addr(stubs[i].funcdata); i++) {
- BUG_ON(i >= num_stubs);
+ if (WARN_ON(i >= num_stubs))
+ return 0;
if (stub_func_addr(stubs[i].funcdata) == func_addr(addr))
return (unsigned long)&stubs[i];
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 91e037ab20a1..8237884ca389 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -115,32 +115,23 @@ static unsigned long can_optimize(struct kprobe *p)
static void optimized_callback(struct optimized_kprobe *op,
struct pt_regs *regs)
{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- unsigned long flags;
-
/* This is possible if op is under delayed unoptimizing */
if (kprobe_disabled(&op->kp))
return;
- local_irq_save(flags);
- hard_irq_disable();
+ preempt_disable();
if (kprobe_running()) {
kprobes_inc_nmissed_count(&op->kp);
} else {
__this_cpu_write(current_kprobe, &op->kp);
regs->nip = (unsigned long)op->kp.addr;
- kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
opt_pre_handler(&op->kp, regs);
__this_cpu_write(current_kprobe, NULL);
}
- /*
- * No need for an explicit __hard_irq_enable() here.
- * local_irq_restore() will re-enable interrupts,
- * if they were hard disabled.
- */
- local_irq_restore(flags);
+ preempt_enable_no_resched();
}
NOKPROBE_SYMBOL(optimized_callback);
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 2ff2b8a19f71..d6597038931d 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -90,7 +90,7 @@ static inline void free_lppacas(void) { }
#endif /* CONFIG_PPC_BOOK3S */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
/*
* 3 persistent SLBs are registered here. The buffer will be zero
@@ -135,11 +135,11 @@ static struct slb_shadow * __init init_slb_shadow(int cpu)
return s;
}
-#else /* CONFIG_PPC_STD_MMU_64 */
+#else /* !CONFIG_PPC_BOOK3S_64 */
static void __init allocate_slb_shadows(int nr_cpus, int limit) { }
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
/* The Paca is an array with one entry per processor. Each contains an
* lppaca, which contains the information shared between the
@@ -170,9 +170,9 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
new_paca->kexec_state = KEXEC_STATE_NONE;
new_paca->__current = &init_task;
new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
new_paca->slb_shadow_ptr = init_slb_shadow(cpu);
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif
#ifdef CONFIG_PPC_BOOK3E
/* For now -- if we have threads this will be adjusted later */
@@ -262,8 +262,8 @@ void copy_mm_to_paca(struct mm_struct *mm)
get_paca()->mm_ctx_id = context->id;
#ifdef CONFIG_PPC_MM_SLICES
- VM_BUG_ON(!mm->context.addr_limit);
- get_paca()->addr_limit = mm->context.addr_limit;
+ VM_BUG_ON(!mm->context.slb_addr_limit);
+ get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
memcpy(&get_paca()->mm_ctx_high_slices_psize,
&context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
@@ -271,7 +271,7 @@ void copy_mm_to_paca(struct mm_struct *mm)
get_paca()->mm_ctx_user_psize = context->user_psize;
get_paca()->mm_ctx_sllp = context->sllp;
#endif
-#else /* CONFIG_PPC_BOOK3S */
+#else /* !CONFIG_PPC_BOOK3S */
return;
#endif
}
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 932b9741aa8f..15ce0306b092 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -90,14 +90,14 @@ int pcibios_unmap_io_space(struct pci_bus *bus)
* to do an appropriate TLB flush here too
*/
if (bus->self) {
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
struct resource *res = bus->resource[0];
#endif
pr_debug("IO unmapping for PCI-PCI bridge %s\n",
pci_name(bus->self));
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
__flush_hash_table_range(&init_mm, res->start + _IO_BASE,
res->end + _IO_BASE + 1);
#endif
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index a0c74bbf3454..bfdd783e3916 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -77,6 +77,13 @@
extern unsigned long _get_SP(void);
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Are we running in "Suspend disabled" mode? If so we have to block any
+ * sigreturn that would get us into suspended state, and we also warn in some
+ * other paths that we should never reach with suspend disabled.
+ */
+bool tm_suspend_disabled __ro_after_init = false;
+
static void check_if_tm_restore_required(struct task_struct *tsk)
{
/*
@@ -97,9 +104,23 @@ static inline bool msr_tm_active(unsigned long msr)
{
return MSR_TM_ACTIVE(msr);
}
+
+static bool tm_active_with_fp(struct task_struct *tsk)
+{
+ return msr_tm_active(tsk->thread.regs->msr) &&
+ (tsk->thread.ckpt_regs.msr & MSR_FP);
+}
+
+static bool tm_active_with_altivec(struct task_struct *tsk)
+{
+ return msr_tm_active(tsk->thread.regs->msr) &&
+ (tsk->thread.ckpt_regs.msr & MSR_VEC);
+}
#else
static inline bool msr_tm_active(unsigned long msr) { return false; }
static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
+static inline bool tm_active_with_fp(struct task_struct *tsk) { return false; }
+static inline bool tm_active_with_altivec(struct task_struct *tsk) { return false; }
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
bool strict_msr_control;
@@ -232,7 +253,7 @@ EXPORT_SYMBOL(enable_kernel_fp);
static int restore_fp(struct task_struct *tsk)
{
- if (tsk->thread.load_fp || msr_tm_active(tsk->thread.regs->msr)) {
+ if (tsk->thread.load_fp || tm_active_with_fp(tsk)) {
load_fp_state(&current->thread.fp_state);
current->thread.load_fp++;
return 1;
@@ -314,7 +335,7 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
static int restore_altivec(struct task_struct *tsk)
{
if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
- (tsk->thread.load_vec || msr_tm_active(tsk->thread.regs->msr))) {
+ (tsk->thread.load_vec || tm_active_with_altivec(tsk))) {
load_vr_state(&tsk->thread.vr_state);
tsk->thread.used_vr = 1;
tsk->thread.load_vec++;
@@ -853,6 +874,10 @@ static void tm_reclaim_thread(struct thread_struct *thr,
if (!MSR_TM_SUSPENDED(mfmsr()))
return;
+ giveup_all(container_of(thr, struct task_struct, thread));
+
+ tm_reclaim(thr, cause);
+
/*
* If we are in a transaction and FP is off then we can't have
* used FP inside that transaction. Hence the checkpointed
@@ -871,10 +896,6 @@ static void tm_reclaim_thread(struct thread_struct *thr,
if ((thr->ckpt_regs.msr & MSR_VEC) == 0)
memcpy(&thr->ckvr_state, &thr->vr_state,
sizeof(struct thread_vr_state));
-
- giveup_all(container_of(thr, struct task_struct, thread));
-
- tm_reclaim(thr, thr->ckpt_regs.msr, cause);
}
void tm_reclaim_current(uint8_t cause)
@@ -903,6 +924,8 @@ static inline void tm_reclaim_task(struct task_struct *tsk)
if (!MSR_TM_ACTIVE(thr->regs->msr))
goto out_and_saveregs;
+ WARN_ON(tm_suspend_disabled);
+
TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, "
"ccr=%lx, msr=%lx, trap=%lx)\n",
tsk->pid, thr->regs->nip,
@@ -923,11 +946,9 @@ out_and_saveregs:
tm_save_sprs(thr);
}
-extern void __tm_recheckpoint(struct thread_struct *thread,
- unsigned long orig_msr);
+extern void __tm_recheckpoint(struct thread_struct *thread);
-void tm_recheckpoint(struct thread_struct *thread,
- unsigned long orig_msr)
+void tm_recheckpoint(struct thread_struct *thread)
{
unsigned long flags;
@@ -946,15 +967,13 @@ void tm_recheckpoint(struct thread_struct *thread,
*/
tm_restore_sprs(thread);
- __tm_recheckpoint(thread, orig_msr);
+ __tm_recheckpoint(thread);
local_irq_restore(flags);
}
static inline void tm_recheckpoint_new_task(struct task_struct *new)
{
- unsigned long msr;
-
if (!cpu_has_feature(CPU_FTR_TM))
return;
@@ -973,13 +992,11 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new)
tm_restore_sprs(&new->thread);
return;
}
- msr = new->thread.ckpt_regs.msr;
/* Recheckpoint to restore original checkpointed register state. */
- TM_DEBUG("*** tm_recheckpoint of pid %d "
- "(new->msr 0x%lx, new->origmsr 0x%lx)\n",
- new->pid, new->thread.regs->msr, msr);
+ TM_DEBUG("*** tm_recheckpoint of pid %d (new->msr 0x%lx)\n",
+ new->pid, new->thread.regs->msr);
- tm_recheckpoint(&new->thread, msr);
+ tm_recheckpoint(&new->thread);
/*
* The checkpointed state has been restored but the live state has
@@ -1119,6 +1136,10 @@ static inline void restore_sprs(struct thread_struct *old_thread,
if (old_thread->tar != new_thread->tar)
mtspr(SPRN_TAR, new_thread->tar);
}
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300) &&
+ old_thread->tidr != new_thread->tidr)
+ mtspr(SPRN_TIDR, new_thread->tidr);
#endif
}
@@ -1155,7 +1176,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
}
#endif /* CONFIG_PPC64 */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
batch = this_cpu_ptr(&ppc64_tlb_batch);
if (batch->active) {
current_thread_info()->local_flags |= _TLF_LAZY_MMU;
@@ -1163,7 +1184,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
__flush_tlb_pending(batch);
batch->active = 0;
}
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
switch_booke_debug_regs(&new->thread.debug);
@@ -1209,7 +1230,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
last = _switch(old_thread, new_thread);
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
batch = this_cpu_ptr(&ppc64_tlb_batch);
@@ -1223,22 +1244,22 @@ struct task_struct *__switch_to(struct task_struct *prev,
* The copy-paste buffer can only store into foreign real
* addresses, so unprivileged processes can not see the
* data or use it in any way unless they have foreign real
- * mappings. We don't have a VAS driver that allocates those
- * yet, so no cpabort is required.
+ * mappings. If the new process has the foreign real address
+ * mappings, we must issue a cp_abort to clear any state and
+ * prevent snooping, corruption or a covert channel.
+ *
+ * DD1 allows paste into normal system memory so we do an
+ * unpaired copy, rather than cp_abort, to clear the buffer,
+ * since cp_abort is quite expensive.
*/
- if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
- /*
- * DD1 allows paste into normal system memory, so we
- * do an unpaired copy here to clear the buffer and
- * prevent a covert channel being set up.
- *
- * cpabort is not used because it is quite expensive.
- */
+ if (current_thread_info()->task->thread.used_vas) {
+ asm volatile(PPC_CP_ABORT);
+ } else if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
asm volatile(PPC_COPY(%0, %1)
: : "r"(dummy_copy_buffer), "r"(0));
}
}
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
return last;
}
@@ -1434,6 +1455,137 @@ void flush_thread(void)
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
}
+int set_thread_uses_vas(void)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -EINVAL;
+
+ current->thread.used_vas = 1;
+
+ /*
+ * Even a process that has no foreign real address mapping can use
+ * an unpaired COPY instruction (to no real effect). Issue CP_ABORT
+ * to clear any pending COPY and prevent a covert channel.
+ *
+ * __switch_to() will issue CP_ABORT on future context switches.
+ */
+ asm volatile(PPC_CP_ABORT);
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+ return 0;
+}
+
+#ifdef CONFIG_PPC64
+static DEFINE_SPINLOCK(vas_thread_id_lock);
+static DEFINE_IDA(vas_thread_ida);
+
+/*
+ * We need to assign a unique thread id to each thread in a process.
+ *
+ * This thread id, referred to as TIDR, and separate from the Linux's tgid,
+ * is intended to be used to direct an ASB_Notify from the hardware to the
+ * thread, when a suitable event occurs in the system.
+ *
+ * One such event is a "paste" instruction in the context of Fast Thread
+ * Wakeup (aka Core-to-core wake up in the Virtual Accelerator Switchboard
+ * (VAS) in POWER9.
+ *
+ * To get a unique TIDR per process we could simply reuse task_pid_nr() but
+ * the problem is that task_pid_nr() is not yet available copy_thread() is
+ * called. Fixing that would require changing more intrusive arch-neutral
+ * code in code path in copy_process()?.
+ *
+ * Further, to assign unique TIDRs within each process, we need an atomic
+ * field (or an IDR) in task_struct, which again intrudes into the arch-
+ * neutral code. So try to assign globally unique TIDRs for now.
+ *
+ * NOTE: TIDR 0 indicates that the thread does not need a TIDR value.
+ * For now, only threads that expect to be notified by the VAS
+ * hardware need a TIDR value and we assign values > 0 for those.
+ */
+#define MAX_THREAD_CONTEXT ((1 << 16) - 1)
+static int assign_thread_tidr(void)
+{
+ int index;
+ int err;
+
+again:
+ if (!ida_pre_get(&vas_thread_ida, GFP_KERNEL))
+ return -ENOMEM;
+
+ spin_lock(&vas_thread_id_lock);
+ err = ida_get_new_above(&vas_thread_ida, 1, &index);
+ spin_unlock(&vas_thread_id_lock);
+
+ if (err == -EAGAIN)
+ goto again;
+ else if (err)
+ return err;
+
+ if (index > MAX_THREAD_CONTEXT) {
+ spin_lock(&vas_thread_id_lock);
+ ida_remove(&vas_thread_ida, index);
+ spin_unlock(&vas_thread_id_lock);
+ return -ENOMEM;
+ }
+
+ return index;
+}
+
+static void free_thread_tidr(int id)
+{
+ spin_lock(&vas_thread_id_lock);
+ ida_remove(&vas_thread_ida, id);
+ spin_unlock(&vas_thread_id_lock);
+}
+
+/*
+ * Clear any TIDR value assigned to this thread.
+ */
+void clear_thread_tidr(struct task_struct *t)
+{
+ if (!t->thread.tidr)
+ return;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ mtspr(SPRN_TIDR, 0);
+ free_thread_tidr(t->thread.tidr);
+ t->thread.tidr = 0;
+}
+
+void arch_release_task_struct(struct task_struct *t)
+{
+ clear_thread_tidr(t);
+}
+
+/*
+ * Assign a unique TIDR (thread id) for task @t and set it in the thread
+ * structure. For now, we only support setting TIDR for 'current' task.
+ */
+int set_thread_tidr(struct task_struct *t)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -EINVAL;
+
+ if (t != current)
+ return -EINVAL;
+
+ t->thread.tidr = assign_thread_tidr();
+ if (t->thread.tidr < 0)
+ return t->thread.tidr;
+
+ mtspr(SPRN_TIDR, t->thread.tidr);
+
+ return 0;
+}
+
+#endif /* CONFIG_PPC64 */
+
void
release_thread(struct task_struct *t)
{
@@ -1467,7 +1619,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
{
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
unsigned long sp_vsid;
unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
@@ -1580,6 +1732,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
}
if (cpu_has_feature(CPU_FTR_HAS_PPR))
p->thread.ppr = INIT_PPR;
+
+ p->thread.tidr = 0;
#endif
kregs->nip = ppc_function_entry(f);
return 0;
@@ -1898,7 +2052,8 @@ unsigned long get_wchan(struct task_struct *p)
do {
sp = *(unsigned long *)sp;
- if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD))
+ if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) ||
+ p->state == TASK_RUNNING)
return 0;
if (count > 0) {
ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE];
@@ -2046,7 +2201,7 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
unsigned long base = mm->brk;
unsigned long ret;
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
/*
* If we are using 1TB segments and we are allowed to randomise
* the heap, we can put it above 1TB so it is backed by a 1TB
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index f83056297441..b15bae265c90 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -47,6 +47,7 @@
#include <asm/mmu.h>
#include <asm/paca.h>
#include <asm/pgtable.h>
+#include <asm/powernv.h>
#include <asm/iommu.h>
#include <asm/btext.h>
#include <asm/sections.h>
@@ -228,7 +229,7 @@ static void __init check_cpu_pa_features(unsigned long node)
ibm_pa_features, ARRAY_SIZE(ibm_pa_features));
}
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
static void __init init_mmu_slb_size(unsigned long node)
{
const __be32 *slb_size_ptr;
@@ -658,6 +659,38 @@ static void __init early_reserve_mem(void)
#endif
}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static bool tm_disabled __initdata;
+
+static int __init parse_ppc_tm(char *str)
+{
+ bool res;
+
+ if (kstrtobool(str, &res))
+ return -EINVAL;
+
+ tm_disabled = !res;
+
+ return 0;
+}
+early_param("ppc_tm", parse_ppc_tm);
+
+static void __init tm_init(void)
+{
+ if (tm_disabled) {
+ pr_info("Disabling hardware transactional memory (HTM)\n");
+ cur_cpu_spec->cpu_user_features2 &=
+ ~(PPC_FEATURE2_HTM_NOSC | PPC_FEATURE2_HTM);
+ cur_cpu_spec->cpu_features &= ~CPU_FTR_TM;
+ return;
+ }
+
+ pnv_tm_init();
+}
+#else
+static void tm_init(void) { }
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
void __init early_init_devtree(void *params)
{
phys_addr_t limit;
@@ -767,6 +800,8 @@ void __init early_init_devtree(void *params)
powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE;
#endif
+ tm_init();
+
DBG(" <- early_init_devtree()\n");
}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 2e3bc16d02b2..2075322cd225 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -773,7 +773,7 @@ void arch_setup_pdev_archdata(struct platform_device *pdev)
static __init void print_system_info(void)
{
pr_info("-----------------------------------------------------\n");
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
#endif
#ifdef CONFIG_PPC_STD_MMU_32
@@ -800,7 +800,7 @@ static __init void print_system_info(void)
pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
#endif
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
if (htab_address)
pr_info("htab_address = 0x%p\n", htab_address);
if (htab_hash_mask)
@@ -898,7 +898,8 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_PPC_MM_SLICES
#ifdef CONFIG_PPC64
- init_mm.context.addr_limit = DEFAULT_MAP_WINDOW_USER64;
+ if (!radix_enabled())
+ init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
#else
#error "context.addr_limit not initialized."
#endif
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index cfba134b3024..21c18071d9d5 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -45,6 +45,12 @@ void emergency_stack_init(void);
static inline void emergency_stack_init(void) { };
#endif
+#ifdef CONFIG_PPC64
+void record_spr_defaults(void);
+#else
+static inline void record_spr_defaults(void) { };
+#endif
+
/*
* Having this in kvm_ppc.h makes include dependencies too
* tricky to solve for setup-common.c so have it here.
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index b89c6aac48c9..8956a9856604 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -69,6 +69,8 @@
#include <asm/opal.h>
#include <asm/cputhreads.h>
+#include "setup.h"
+
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
#else
@@ -317,6 +319,13 @@ void __init early_setup(unsigned long dt_ptr)
early_init_mmu();
/*
+ * After firmware and early platform setup code has set things up,
+ * we note the SPR values for configurable control/performance
+ * registers, and use those as initial defaults.
+ */
+ record_spr_defaults();
+
+ /*
* At this point, we can let interrupts switch to virtual mode
* (the MMU has been setup), so adjust the MSR in the PACA to
* have IR and DR set and enable AIL if it exists
@@ -360,8 +369,16 @@ void early_setup_secondary(void)
#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
static bool use_spinloop(void)
{
- if (!IS_ENABLED(CONFIG_PPC_BOOK3E))
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S)) {
+ /*
+ * See comments in head_64.S -- not all platforms insert
+ * secondaries at __secondary_hold and wait at the spin
+ * loop.
+ */
+ if (firmware_has_feature(FW_FEATURE_OPAL))
+ return false;
return true;
+ }
/*
* When book3e boots from kexec, the ePAPR spin table does
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index e9436c5e1e09..3d7539b90010 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -103,7 +103,7 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
static void do_signal(struct task_struct *tsk)
{
sigset_t *oldset = sigmask_to_save();
- struct ksignal ksig;
+ struct ksignal ksig = { .sig = 0 };
int ret;
int is32 = is_32bit_task();
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 92fb1c8dbbd8..16d16583cf11 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -519,6 +519,8 @@ static int save_tm_user_regs(struct pt_regs *regs,
{
unsigned long msr = regs->msr;
+ WARN_ON(tm_suspend_disabled);
+
/* Remove TM bits from thread's MSR. The MSR in the sigcontext
* just indicates to userland that we were doing a transaction, but we
* don't want to return in transactional state. This also ensures
@@ -769,6 +771,8 @@ static long restore_tm_user_regs(struct pt_regs *regs,
int i;
#endif
+ if (tm_suspend_disabled)
+ return 1;
/*
* restore general registers but not including MSR or SOFTE. Also
* take care of keeping r2 (TLS) intact if not a signal.
@@ -876,7 +880,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
/* Make sure the transaction is marked as failed */
current->thread.tm_texasr |= TEXASR_FS;
/* This loads the checkpointed FP/VEC state, if used */
- tm_recheckpoint(&current->thread, msr);
+ tm_recheckpoint(&current->thread);
/* This loads the speculative FP/VEC state, if used */
msr_check_and_set(msr & (MSR_FP | MSR_VEC));
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index b2c002993d78..4b9ca3570344 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -214,6 +214,8 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
BUG_ON(!MSR_TM_ACTIVE(regs->msr));
+ WARN_ON(tm_suspend_disabled);
+
/* Remove TM bits from thread's MSR. The MSR in the sigcontext
* just indicates to userland that we were doing a transaction, but we
* don't want to return in transactional state. This also ensures
@@ -430,6 +432,9 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
BUG_ON(tsk != current);
+ if (tm_suspend_disabled)
+ return -EINVAL;
+
/* copy the GPRs */
err |= __copy_from_user(regs->gpr, tm_sc->gp_regs, sizeof(regs->gpr));
err |= __copy_from_user(&tsk->thread.ckpt_regs, sc->gp_regs,
@@ -558,7 +563,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
/* Make sure the transaction is marked as failed */
tsk->thread.tm_texasr |= TEXASR_FS;
/* This loads the checkpointed FP/VEC state, if used */
- tm_recheckpoint(&tsk->thread, msr);
+ tm_recheckpoint(&tsk->thread);
msr_check_and_set(msr & (MSR_FP | MSR_VEC));
if (msr & MSR_FP) {
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 4437c70c7c2b..b8d4a1dac39f 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -590,6 +590,17 @@ static void sysfs_create_dscr_default(void)
if (cpu_has_feature(CPU_FTR_DSCR))
err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
}
+
+void __init record_spr_defaults(void)
+{
+ int cpu;
+
+ if (cpu_has_feature(CPU_FTR_DSCR)) {
+ dscr_default = mfspr(SPRN_DSCR);
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+ paca[cpu].dscr_default = dscr_default;
+ }
+}
#endif /* CONFIG_PPC64 */
#ifdef HAS_PPC_PMC_PA6T
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index a3374e8a258c..e3c5f75d137c 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -230,8 +230,7 @@ int __init TAU_init(void)
/* first, set up the window shrinking timer */
- init_timer(&tau_timer);
- tau_timer.function = tau_timeout_smp;
+ setup_timer(&tau_timer, tau_timeout_smp, 0UL);
tau_timer.expires = jiffies + shrink_timer;
add_timer(&tau_timer);
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 1da12f521cb7..b92ac8e711db 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -80,15 +80,12 @@ _GLOBAL(tm_abort)
blr
/* void tm_reclaim(struct thread_struct *thread,
- * unsigned long orig_msr,
* uint8_t cause)
*
* - Performs a full reclaim. This destroys outstanding
* transactions and updates thread->regs.tm_ckpt_* with the
* original checkpointed state. Note that thread->regs is
* unchanged.
- * - FP regs are written back to thread->transact_fpr before
- * reclaiming. These are the transactional (current) versions.
*
* Purpose is to both abort transactions of, and preserve the state of,
* a transactions at a context switch. We preserve/restore both sets of process
@@ -99,9 +96,9 @@ _GLOBAL(tm_abort)
* Call with IRQs off, stacks get all out of sync for some periods in here!
*/
_GLOBAL(tm_reclaim)
- mfcr r6
+ mfcr r5
mflr r0
- stw r6, 8(r1)
+ stw r5, 8(r1)
std r0, 16(r1)
std r2, STK_GOT(r1)
stdu r1, -TM_FRAME_SIZE(r1)
@@ -109,7 +106,6 @@ _GLOBAL(tm_reclaim)
/* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */
std r3, STK_PARAM(R3)(r1)
- std r4, STK_PARAM(R4)(r1)
SAVE_NVGPRS(r1)
/* We need to setup MSR for VSX register save instructions. */
@@ -139,8 +135,8 @@ _GLOBAL(tm_reclaim)
std r1, PACAR1(r13)
/* Clear MSR RI since we are about to change r1, EE is already off. */
- li r4, 0
- mtmsrd r4, 1
+ li r5, 0
+ mtmsrd r5, 1
/*
* BE CAREFUL HERE:
@@ -152,7 +148,7 @@ _GLOBAL(tm_reclaim)
* to user register state. (FPRs, CCR etc. also!)
* Use an sprg and a tm_scratch in the PACA to shuffle.
*/
- TRECLAIM(R5) /* Cause in r5 */
+ TRECLAIM(R4) /* Cause in r4 */
/* ******************** GPRs ******************** */
/* Stash the checkpointed r13 away in the scratch SPR and get the real
@@ -243,40 +239,30 @@ _GLOBAL(tm_reclaim)
/* ******************** FPR/VR/VSRs ************
- * After reclaiming, capture the checkpointed FPRs/VRs /if used/.
- *
- * (If VSX used, FP and VMX are implied. Or, we don't need to look
- * at MSR.VSX as copying FP regs if .FP, vector regs if .VMX covers it.)
- *
- * We're passed the thread's MSR as the second parameter
+ * After reclaiming, capture the checkpointed FPRs/VRs.
*
* We enabled VEC/FP/VSX in the msr above, so we can execute these
* instructions!
*/
- ld r4, STK_PARAM(R4)(r1) /* Second parameter, MSR * */
mr r3, r12
- andis. r0, r4, MSR_VEC@h
- beq dont_backup_vec
+ /* Altivec (VEC/VMX/VR)*/
addi r7, r3, THREAD_CKVRSTATE
SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */
mfvscr v0
li r6, VRSTATE_VSCR
stvx v0, r7, r6
-dont_backup_vec:
+
+ /* VRSAVE */
mfspr r0, SPRN_VRSAVE
std r0, THREAD_CKVRSAVE(r3)
- andi. r0, r4, MSR_FP
- beq dont_backup_fp
-
+ /* Floating Point (FP) */
addi r7, r3, THREAD_CKFPSTATE
SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 transact fp state */
-
mffs fr0
stfd fr0,FPSTATE_FPSCR(r7)
-dont_backup_fp:
/* TM regs, incl TEXASR -- these live in thread_struct. Note they've
* been updated by the treclaim, to explain to userland the failure
@@ -344,22 +330,19 @@ _GLOBAL(__tm_recheckpoint)
*/
subi r7, r7, STACK_FRAME_OVERHEAD
+ /* We need to setup MSR for FP/VMX/VSX register save instructions. */
mfmsr r6
- /* R4 = original MSR to indicate whether thread used FP/Vector etc. */
-
- /* Enable FP/vec in MSR if necessary! */
- lis r5, MSR_VEC@h
+ mr r5, r6
ori r5, r5, MSR_FP
- and. r5, r4, r5
- beq restore_gprs /* if neither, skip both */
-
+#ifdef CONFIG_ALTIVEC
+ oris r5, r5, MSR_VEC@h
+#endif
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
- oris r5, r5, MSR_VSX@h
+ oris r5,r5, MSR_VSX@h
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
- or r5, r6, r5 /* Set MSR.FP+.VSX/.VEC */
- mtmsr r5
+ mtmsrd r5
#ifdef CONFIG_ALTIVEC
/*
@@ -368,28 +351,20 @@ _GLOBAL(__tm_recheckpoint)
* thread.fp_state[] version holds the 'live' (transactional)
* and will be loaded subsequently by any FPUnavailable trap.
*/
- andis. r0, r4, MSR_VEC@h
- beq dont_restore_vec
-
addi r8, r3, THREAD_CKVRSTATE
li r5, VRSTATE_VSCR
lvx v0, r8, r5
mtvscr v0
REST_32VRS(0, r5, r8) /* r5 scratch, r8 ptr */
-dont_restore_vec:
ld r5, THREAD_CKVRSAVE(r3)
mtspr SPRN_VRSAVE, r5
#endif
- andi. r0, r4, MSR_FP
- beq dont_restore_fp
-
addi r8, r3, THREAD_CKFPSTATE
lfd fr0, FPSTATE_FPSCR(r8)
MTFSF_L(fr0)
REST_32FPRS_VSRS(0, R4, R8)
-dont_restore_fp:
mtmsr r6 /* FP/Vec off again! */
restore_gprs:
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index b4e2b7165f79..3f3e81852422 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -110,9 +110,9 @@ ftrace_call:
/* NIP has not been altered, skip over further checks */
beq 1f
- /* Check if there is an active kprobe on us */
+ /* Check if there is an active jprobe on us */
subi r3, r14, 4
- bl is_current_kprobe_addr
+ bl __is_active_jprobe
nop
/*
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 13c9dcdcba69..f3eb61be0d30 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -37,6 +37,7 @@
#include <linux/kdebug.h>
#include <linux/ratelimit.h>
#include <linux/context_tracking.h>
+#include <linux/smp.h>
#include <asm/emulated_ops.h>
#include <asm/pgtable.h>
@@ -699,6 +700,187 @@ void SMIException(struct pt_regs *regs)
die("System Management Interrupt", regs, SIGABRT);
}
+#ifdef CONFIG_VSX
+static void p9_hmi_special_emu(struct pt_regs *regs)
+{
+ unsigned int ra, rb, t, i, sel, instr, rc;
+ const void __user *addr;
+ u8 vbuf[16], *vdst;
+ unsigned long ea, msr, msr_mask;
+ bool swap;
+
+ if (__get_user_inatomic(instr, (unsigned int __user *)regs->nip))
+ return;
+
+ /*
+ * lxvb16x opcode: 0x7c0006d8
+ * lxvd2x opcode: 0x7c000698
+ * lxvh8x opcode: 0x7c000658
+ * lxvw4x opcode: 0x7c000618
+ */
+ if ((instr & 0xfc00073e) != 0x7c000618) {
+ pr_devel("HMI vec emu: not vector CI %i:%s[%d] nip=%016lx"
+ " instr=%08x\n",
+ smp_processor_id(), current->comm, current->pid,
+ regs->nip, instr);
+ return;
+ }
+
+ /* Grab vector registers into the task struct */
+ msr = regs->msr; /* Grab msr before we flush the bits */
+ flush_vsx_to_thread(current);
+ enable_kernel_altivec();
+
+ /*
+ * Is userspace running with a different endian (this is rare but
+ * not impossible)
+ */
+ swap = (msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
+
+ /* Decode the instruction */
+ ra = (instr >> 16) & 0x1f;
+ rb = (instr >> 11) & 0x1f;
+ t = (instr >> 21) & 0x1f;
+ if (instr & 1)
+ vdst = (u8 *)&current->thread.vr_state.vr[t];
+ else
+ vdst = (u8 *)&current->thread.fp_state.fpr[t][0];
+
+ /* Grab the vector address */
+ ea = regs->gpr[rb] + (ra ? regs->gpr[ra] : 0);
+ if (is_32bit_task())
+ ea &= 0xfffffffful;
+ addr = (__force const void __user *)ea;
+
+ /* Check it */
+ if (!access_ok(VERIFY_READ, addr, 16)) {
+ pr_devel("HMI vec emu: bad access %i:%s[%d] nip=%016lx"
+ " instr=%08x addr=%016lx\n",
+ smp_processor_id(), current->comm, current->pid,
+ regs->nip, instr, (unsigned long)addr);
+ return;
+ }
+
+ /* Read the vector */
+ rc = 0;
+ if ((unsigned long)addr & 0xfUL)
+ /* unaligned case */
+ rc = __copy_from_user_inatomic(vbuf, addr, 16);
+ else
+ __get_user_atomic_128_aligned(vbuf, addr, rc);
+ if (rc) {
+ pr_devel("HMI vec emu: page fault %i:%s[%d] nip=%016lx"
+ " instr=%08x addr=%016lx\n",
+ smp_processor_id(), current->comm, current->pid,
+ regs->nip, instr, (unsigned long)addr);
+ return;
+ }
+
+ pr_devel("HMI vec emu: emulated vector CI %i:%s[%d] nip=%016lx"
+ " instr=%08x addr=%016lx\n",
+ smp_processor_id(), current->comm, current->pid, regs->nip,
+ instr, (unsigned long) addr);
+
+ /* Grab instruction "selector" */
+ sel = (instr >> 6) & 3;
+
+ /*
+ * Check to make sure the facility is actually enabled. This
+ * could happen if we get a false positive hit.
+ *
+ * lxvd2x/lxvw4x always check MSR VSX sel = 0,2
+ * lxvh8x/lxvb16x check MSR VSX or VEC depending on VSR used sel = 1,3
+ */
+ msr_mask = MSR_VSX;
+ if ((sel & 1) && (instr & 1)) /* lxvh8x & lxvb16x + VSR >= 32 */
+ msr_mask = MSR_VEC;
+ if (!(msr & msr_mask)) {
+ pr_devel("HMI vec emu: MSR fac clear %i:%s[%d] nip=%016lx"
+ " instr=%08x msr:%016lx\n",
+ smp_processor_id(), current->comm, current->pid,
+ regs->nip, instr, msr);
+ return;
+ }
+
+ /* Do logging here before we modify sel based on endian */
+ switch (sel) {
+ case 0: /* lxvw4x */
+ PPC_WARN_EMULATED(lxvw4x, regs);
+ break;
+ case 1: /* lxvh8x */
+ PPC_WARN_EMULATED(lxvh8x, regs);
+ break;
+ case 2: /* lxvd2x */
+ PPC_WARN_EMULATED(lxvd2x, regs);
+ break;
+ case 3: /* lxvb16x */
+ PPC_WARN_EMULATED(lxvb16x, regs);
+ break;
+ }
+
+#ifdef __LITTLE_ENDIAN__
+ /*
+ * An LE kernel stores the vector in the task struct as an LE
+ * byte array (effectively swapping both the components and
+ * the content of the components). Those instructions expect
+ * the components to remain in ascending address order, so we
+ * swap them back.
+ *
+ * If we are running a BE user space, the expectation is that
+ * of a simple memcpy, so forcing the emulation to look like
+ * a lxvb16x should do the trick.
+ */
+ if (swap)
+ sel = 3;
+
+ switch (sel) {
+ case 0: /* lxvw4x */
+ for (i = 0; i < 4; i++)
+ ((u32 *)vdst)[i] = ((u32 *)vbuf)[3-i];
+ break;
+ case 1: /* lxvh8x */
+ for (i = 0; i < 8; i++)
+ ((u16 *)vdst)[i] = ((u16 *)vbuf)[7-i];
+ break;
+ case 2: /* lxvd2x */
+ for (i = 0; i < 2; i++)
+ ((u64 *)vdst)[i] = ((u64 *)vbuf)[1-i];
+ break;
+ case 3: /* lxvb16x */
+ for (i = 0; i < 16; i++)
+ vdst[i] = vbuf[15-i];
+ break;
+ }
+#else /* __LITTLE_ENDIAN__ */
+ /* On a big endian kernel, a BE userspace only needs a memcpy */
+ if (!swap)
+ sel = 3;
+
+ /* Otherwise, we need to swap the content of the components */
+ switch (sel) {
+ case 0: /* lxvw4x */
+ for (i = 0; i < 4; i++)
+ ((u32 *)vdst)[i] = cpu_to_le32(((u32 *)vbuf)[i]);
+ break;
+ case 1: /* lxvh8x */
+ for (i = 0; i < 8; i++)
+ ((u16 *)vdst)[i] = cpu_to_le16(((u16 *)vbuf)[i]);
+ break;
+ case 2: /* lxvd2x */
+ for (i = 0; i < 2; i++)
+ ((u64 *)vdst)[i] = cpu_to_le64(((u64 *)vbuf)[i]);
+ break;
+ case 3: /* lxvb16x */
+ memcpy(vdst, vbuf, 16);
+ break;
+ }
+#endif /* !__LITTLE_ENDIAN__ */
+
+ /* Go to next instruction */
+ regs->nip += 4;
+}
+#endif /* CONFIG_VSX */
+
void handle_hmi_exception(struct pt_regs *regs)
{
struct pt_regs *old_regs;
@@ -706,6 +888,21 @@ void handle_hmi_exception(struct pt_regs *regs)
old_regs = set_irq_regs(regs);
irq_enter();
+#ifdef CONFIG_VSX
+ /* Real mode flagged P9 special emu is needed */
+ if (local_paca->hmi_p9_special_emu) {
+ local_paca->hmi_p9_special_emu = 0;
+
+ /*
+ * We don't want to take page faults while doing the
+ * emulation, we just replay the instruction if necessary.
+ */
+ pagefault_disable();
+ p9_hmi_special_emu(regs);
+ pagefault_enable();
+ }
+#endif /* CONFIG_VSX */
+
if (ppc_md.handle_hmi_exception)
ppc_md.handle_hmi_exception(regs);
@@ -1140,13 +1337,8 @@ void program_check_exception(struct pt_regs *regs)
* - A treclaim is attempted when non transactional.
* - A tend is illegally attempted.
* - writing a TM SPR when transactional.
- */
- if (!user_mode(regs) &&
- report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) {
- regs->nip += 4;
- goto bail;
- }
- /* If usermode caused this, it's done something illegal and
+ *
+ * If usermode caused this, it's done something illegal and
* gets a SIGILL slap on the wrist. We call it an illegal
* operand to distinguish from the instruction just being bad
* (e.g. executing a 'tend' on a CPU without TM!); it's an
@@ -1487,7 +1679,7 @@ void fp_unavailable_tm(struct pt_regs *regs)
/* Reclaim didn't save out any FPRs to transact_fprs. */
/* Enable FP for the task: */
- regs->msr |= (MSR_FP | current->thread.fpexc_mode);
+ current->thread.load_fp = 1;
/* This loads and recheckpoints the FP registers from
* thread.fpr[]. They will remain in registers after the
@@ -1495,15 +1687,7 @@ void fp_unavailable_tm(struct pt_regs *regs)
* If VMX is in use, the VRs now hold checkpointed values,
* so we don't want to load the VRs from the thread_struct.
*/
- tm_recheckpoint(&current->thread, MSR_FP);
-
- /* If VMX is in use, get the transactional values back */
- if (regs->msr & MSR_VEC) {
- msr_check_and_set(MSR_VEC);
- load_vr_state(&current->thread.vr_state);
- /* At this point all the VSX state is loaded, so enable it */
- regs->msr |= MSR_VSX;
- }
+ tm_recheckpoint(&current->thread);
}
void altivec_unavailable_tm(struct pt_regs *regs)
@@ -1516,21 +1700,13 @@ void altivec_unavailable_tm(struct pt_regs *regs)
"MSR=%lx\n",
regs->nip, regs->msr);
tm_reclaim_current(TM_CAUSE_FAC_UNAV);
- regs->msr |= MSR_VEC;
- tm_recheckpoint(&current->thread, MSR_VEC);
+ current->thread.load_vec = 1;
+ tm_recheckpoint(&current->thread);
current->thread.used_vr = 1;
-
- if (regs->msr & MSR_FP) {
- msr_check_and_set(MSR_FP);
- load_fp_state(&current->thread.fp_state);
- regs->msr |= MSR_VSX;
- }
}
void vsx_unavailable_tm(struct pt_regs *regs)
{
- unsigned long orig_msr = regs->msr;
-
/* See the comments in fp_unavailable_tm(). This works similarly,
* though we're loading both FP and VEC registers in here.
*
@@ -1544,29 +1720,13 @@ void vsx_unavailable_tm(struct pt_regs *regs)
current->thread.used_vsr = 1;
- /* If FP and VMX are already loaded, we have all the state we need */
- if ((orig_msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC)) {
- regs->msr |= MSR_VSX;
- return;
- }
-
/* This reclaims FP and/or VR regs if they're already enabled */
tm_reclaim_current(TM_CAUSE_FAC_UNAV);
- regs->msr |= MSR_VEC | MSR_FP | current->thread.fpexc_mode |
- MSR_VSX;
-
- /* This loads & recheckpoints FP and VRs; but we have
- * to be sure not to overwrite previously-valid state.
- */
- tm_recheckpoint(&current->thread, regs->msr & ~orig_msr);
-
- msr_check_and_set(orig_msr & (MSR_FP | MSR_VEC));
+ current->thread.load_vec = 1;
+ current->thread.load_fp = 1;
- if (orig_msr & MSR_FP)
- load_fp_state(&current->thread.fp_state);
- if (orig_msr & MSR_VEC)
- load_vr_state(&current->thread.vr_state);
+ tm_recheckpoint(&current->thread);
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
@@ -1924,6 +2084,10 @@ struct ppc_emulated ppc_emulated = {
WARN_EMULATED_SETUP(mfdscr),
WARN_EMULATED_SETUP(mtdscr),
WARN_EMULATED_SETUP(lq_stq),
+ WARN_EMULATED_SETUP(lxvw4x),
+ WARN_EMULATED_SETUP(lxvh8x),
+ WARN_EMULATED_SETUP(lxvd2x),
+ WARN_EMULATED_SETUP(lxvb16x),
#endif
};
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 1d89163d67f2..87da80ccced1 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -98,8 +98,7 @@ static void wd_lockup_ipi(struct pt_regs *regs)
else
dump_stack();
- if (hardlockup_panic)
- nmi_panic(regs, "Hard LOCKUP");
+ /* Do not panic from here because that can recurse into NMI IPI layer */
}
static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
@@ -135,15 +134,18 @@ static void watchdog_smp_panic(int cpu, u64 tb)
pr_emerg("Watchdog CPU:%d detected Hard LOCKUP other CPUS:%*pbl\n",
cpu, cpumask_pr_args(&wd_smp_cpus_pending));
- /*
- * Try to trigger the stuck CPUs.
- */
- for_each_cpu(c, &wd_smp_cpus_pending) {
- if (c == cpu)
- continue;
- smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
+ if (!sysctl_hardlockup_all_cpu_backtrace) {
+ /*
+ * Try to trigger the stuck CPUs, unless we are going to
+ * get a backtrace on all of them anyway.
+ */
+ for_each_cpu(c, &wd_smp_cpus_pending) {
+ if (c == cpu)
+ continue;
+ smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
+ }
+ smp_flush_nmi_ipi(1000000);
}
- smp_flush_nmi_ipi(1000000);
/* Take the stuck CPUs out of the watch group */
set_cpumask_stuck(&wd_smp_cpus_pending, tb);
@@ -275,9 +277,12 @@ void arch_touch_nmi_watchdog(void)
{
unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
int cpu = smp_processor_id();
+ u64 tb = get_tb();
- if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks)
- watchdog_timer_interrupt(cpu);
+ if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) {
+ per_cpu(wd_timer_tb, cpu) = tb;
+ wd_smp_clear_cpu_pending(cpu, tb);
+ }
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8d43cf205d34..40e5857c4b1c 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -47,6 +47,7 @@
#include <asm/reg.h>
#include <asm/ppc-opcode.h>
+#include <asm/asm-prototypes.h>
#include <asm/disassemble.h>
#include <asm/cputable.h>
#include <asm/cacheflush.h>
@@ -1089,9 +1090,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->stat.ext_intr_exits++;
r = RESUME_GUEST;
break;
- /* HMI is hypervisor interrupt and host has handled it. Resume guest.*/
+ /* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/
case BOOK3S_INTERRUPT_HMI:
case BOOK3S_INTERRUPT_PERFMON:
+ case BOOK3S_INTERRUPT_SYSTEM_RESET:
r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_MACHINE_CHECK:
@@ -2117,15 +2119,6 @@ static int kvmppc_grab_hwthread(int cpu)
struct paca_struct *tpaca;
long timeout = 10000;
- /*
- * ISA v3.0 idle routines do not set hwthread_state or test
- * hwthread_req, so they can not grab idle threads.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- WARN(1, "KVM: can not control sibling threads\n");
- return -EBUSY;
- }
-
tpaca = &paca[cpu];
/* Ensure the thread won't go into the kernel if it wakes */
@@ -2160,12 +2153,10 @@ static void kvmppc_release_hwthread(int cpu)
struct paca_struct *tpaca;
tpaca = &paca[cpu];
+ tpaca->kvm_hstate.hwthread_req = 0;
tpaca->kvm_hstate.kvm_vcpu = NULL;
tpaca->kvm_hstate.kvm_vcore = NULL;
tpaca->kvm_hstate.kvm_split_mode = NULL;
- if (!cpu_has_feature(CPU_FTR_ARCH_300))
- tpaca->kvm_hstate.hwthread_req = 0;
-
}
static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
@@ -2615,6 +2606,9 @@ static void set_irq_happened(int trap)
case BOOK3S_INTERRUPT_HMI:
local_paca->irq_happened |= PACA_IRQ_HMI;
break;
+ case BOOK3S_INTERRUPT_SYSTEM_RESET:
+ replay_system_reset();
+ break;
}
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 42639fba89e8..68bf0f14a962 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -149,11 +149,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
subf r4, r4, r3
mtspr SPRN_DEC, r4
-BEGIN_FTR_SECTION
/* hwthread_req may have got set by cede or no vcpu, so clear it */
li r0, 0
stb r0, HSTATE_HWTHREAD_REQ(r13)
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/*
* For external interrupts we need to call the Linux
@@ -316,7 +314,6 @@ kvm_novcpu_exit:
* Relocation is off and most register values are lost.
* r13 points to the PACA.
* r3 contains the SRR1 wakeup value, SRR1 is trashed.
- * This is not used by ISAv3.0B processors.
*/
.globl kvm_start_guest
kvm_start_guest:
@@ -435,9 +432,6 @@ kvm_secondary_got_guest:
* While waiting we also need to check if we get given a vcpu to run.
*/
kvm_no_guest:
-BEGIN_FTR_SECTION
- twi 31,0,0
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lbz r3, HSTATE_HWTHREAD_REQ(r13)
cmpwi r3, 0
bne 53f
@@ -2546,10 +2540,8 @@ kvm_do_nap:
clrrdi r0, r0, 1
mtspr SPRN_CTRLT, r0
-BEGIN_FTR_SECTION
li r0,1
stb r0,HSTATE_HWTHREAD_REQ(r13)
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mfspr r5,SPRN_LPCR
ori r5,r5,LPCR_PECE0 | LPCR_PECE1
BEGIN_FTR_SECTION
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index ee279c7f4802..1abe6eb51335 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -644,7 +644,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
#endif
case KVM_CAP_PPC_HTM:
- r = cpu_has_feature(CPU_FTR_TM_COMP) && hv_enabled;
+ r = hv_enabled &&
+ (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM_COMP);
break;
default:
r = 0;
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index c66c3626a216..3c29c9009bbf 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -24,7 +24,7 @@ endif
obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
copyuser_power7.o string_64.o copypage_power7.o memcpy_power7.o \
- memcpy_64.o memcmp_64.o
+ memcpy_64.o memcmp_64.o pmem.o
obj64-$(CONFIG_SMP) += locks.o
obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
new file mode 100644
index 000000000000..53c018762e1c
--- /dev/null
+++ b/arch/powerpc/lib/pmem.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright(c) 2017 IBM Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/string.h>
+#include <linux/export.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+
+/*
+ * CONFIG_ARCH_HAS_PMEM_API symbols
+ */
+void arch_wb_cache_pmem(void *addr, size_t size)
+{
+ unsigned long start = (unsigned long) addr;
+ flush_inval_dcache_range(start, start + size);
+}
+EXPORT_SYMBOL(arch_wb_cache_pmem);
+
+void arch_invalidate_pmem(void *addr, size_t size)
+{
+ unsigned long start = (unsigned long) addr;
+ flush_inval_dcache_range(start, start + size);
+}
+EXPORT_SYMBOL(arch_invalidate_pmem);
+
+/*
+ * CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE symbols
+ */
+long __copy_from_user_flushcache(void *dest, const void __user *src,
+ unsigned size)
+{
+ unsigned long copied, start = (unsigned long) dest;
+
+ copied = __copy_from_user(dest, src, size);
+ flush_inval_dcache_range(start, start + size);
+
+ return copied;
+}
+
+void *memcpy_flushcache(void *dest, const void *src, size_t size)
+{
+ unsigned long start = (unsigned long) dest;
+
+ memcpy(dest, src, size);
+ flush_inval_dcache_range(start, start + size);
+
+ return dest;
+}
+EXPORT_SYMBOL(memcpy_flushcache);
+
+void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
+ size_t len)
+{
+ memcpy_flushcache(to, page_to_virt(page) + offset, len);
+}
+EXPORT_SYMBOL(memcpy_page_flushcache);
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index f208f560aecd..70274b7b4773 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -31,6 +31,8 @@ extern char system_call_common[];
#define XER_SO 0x80000000U
#define XER_OV 0x40000000U
#define XER_CA 0x20000000U
+#define XER_OV32 0x00080000U
+#define XER_CA32 0x00040000U
#ifdef CONFIG_PPC_FPU
/*
@@ -962,6 +964,16 @@ static nokprobe_inline void set_cr0(const struct pt_regs *regs,
op->ccval |= 0x20000000;
}
+static nokprobe_inline void set_ca32(struct instruction_op *op, bool val)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (val)
+ op->xerval |= XER_CA32;
+ else
+ op->xerval &= ~XER_CA32;
+ }
+}
+
static nokprobe_inline void add_with_carry(const struct pt_regs *regs,
struct instruction_op *op, int rd,
unsigned long val1, unsigned long val2,
@@ -985,6 +997,9 @@ static nokprobe_inline void add_with_carry(const struct pt_regs *regs,
op->xerval |= XER_CA;
else
op->xerval &= ~XER_CA;
+
+ set_ca32(op, (unsigned int)val < (unsigned int)val1 ||
+ (carry_in && (unsigned int)val == (unsigned int)val1));
}
static nokprobe_inline void do_cmp_signed(const struct pt_regs *regs,
@@ -1791,6 +1806,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
op->xerval |= XER_CA;
else
op->xerval &= ~XER_CA;
+ set_ca32(op, op->xerval & XER_CA);
goto logical_done;
case 824: /* srawi */
@@ -1803,6 +1819,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
op->xerval |= XER_CA;
else
op->xerval &= ~XER_CA;
+ set_ca32(op, op->xerval & XER_CA);
goto logical_done;
#ifdef __powerpc64__
@@ -1832,6 +1849,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
op->xerval |= XER_CA;
else
op->xerval &= ~XER_CA;
+ set_ca32(op, op->xerval & XER_CA);
goto logical_done;
case 826: /* sradi with sh_5 = 0 */
@@ -1845,6 +1863,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
op->xerval |= XER_CA;
else
op->xerval &= ~XER_CA;
+ set_ca32(op, op->xerval & XER_CA);
goto logical_done;
#endif /* __powerpc64__ */
@@ -2698,6 +2717,7 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op)
}
regs->nip = next_pc;
}
+NOKPROBE_SYMBOL(emulate_update_regs);
/*
* Emulate a previously-analysed load or store instruction.
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index a0c327d544d1..76a6b057d454 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -15,11 +15,11 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o
hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o
-obj-$(CONFIG_PPC_STD_MMU_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
+obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o
obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o
-ifeq ($(CONFIG_PPC_STD_MMU_64),y)
+ifeq ($(CONFIG_PPC_BOOK3S_64),y)
obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o
obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o
endif
@@ -32,7 +32,7 @@ obj-$(CONFIG_PPC_SPLPAR) += vphn.o
obj-$(CONFIG_PPC_MM_SLICES) += slice.o
obj-y += hugetlbpage.o
ifeq ($(CONFIG_HUGETLB_PAGE),y)
-obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o
+obj-$(CONFIG_PPC_BOOK3S_64) += hugetlbpage-hash64.o
obj-$(CONFIG_PPC_RADIX_MMU) += hugetlbpage-radix.o
obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o
endif
diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c
index 5c4c93dcff19..14cfb11b09d0 100644
--- a/arch/powerpc/mm/dump_hashpagetable.c
+++ b/arch/powerpc/mm/dump_hashpagetable.c
@@ -500,7 +500,7 @@ static void populate_markers(void)
address_markers[6].start_address = PHB_IO_END;
address_markers[7].start_address = IOREMAP_BASE;
address_markers[8].start_address = IOREMAP_END;
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
address_markers[9].start_address = H_VMEMMAP_BASE;
#else
address_markers[9].start_address = VMEMMAP_BASE;
diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c
index c9282d27b203..c2e7dea59490 100644
--- a/arch/powerpc/mm/dump_linuxpagetables.c
+++ b/arch/powerpc/mm/dump_linuxpagetables.c
@@ -112,7 +112,7 @@ struct flag_info {
static const struct flag_info flag_array[] = {
{
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
.mask = _PAGE_PRIVILEGED,
.val = 0,
#else
@@ -147,7 +147,7 @@ static const struct flag_info flag_array[] = {
.set = "present",
.clear = " ",
}, {
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
.mask = H_PAGE_HASHPTE,
.val = H_PAGE_HASHPTE,
#else
@@ -157,7 +157,7 @@ static const struct flag_info flag_array[] = {
.set = "hpte",
.clear = " ",
}, {
-#ifndef CONFIG_PPC_STD_MMU_64
+#ifndef CONFIG_PPC_BOOK3S_64
.mask = _PAGE_GUARDED,
.val = _PAGE_GUARDED,
.set = "guarded",
@@ -174,7 +174,7 @@ static const struct flag_info flag_array[] = {
.set = "accessed",
.clear = " ",
}, {
-#ifndef CONFIG_PPC_STD_MMU_64
+#ifndef CONFIG_PPC_BOOK3S_64
.mask = _PAGE_WRITETHRU,
.val = _PAGE_WRITETHRU,
.set = "write through",
@@ -450,7 +450,7 @@ static void populate_markers(void)
address_markers[i++].start_address = PHB_IO_END;
address_markers[i++].start_address = IOREMAP_BASE;
address_markers[i++].start_address = IOREMAP_END;
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
address_markers[i++].start_address = H_VMEMMAP_BASE;
#else
address_markers[i++].start_address = VMEMMAP_BASE;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 67ec2e927253..655a5a9a183d 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -21,6 +21,7 @@
#undef DEBUG
#undef DEBUG_LOW
+#define pr_fmt(fmt) "hash-mmu: " fmt
#include <linux/spinlock.h>
#include <linux/errno.h>
#include <linux/sched/mm.h>
diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c
index 558e9d3891bf..2486bee0f93e 100644
--- a/arch/powerpc/mm/hugetlbpage-radix.c
+++ b/arch/powerpc/mm/hugetlbpage-radix.c
@@ -49,17 +49,22 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct hstate *h = hstate_file(file);
+ int fixed = (flags & MAP_FIXED);
+ unsigned long high_limit;
struct vm_unmapped_area_info info;
- if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE))
- mm->context.addr_limit = TASK_SIZE;
+ high_limit = DEFAULT_MAP_WINDOW;
+ if (addr >= high_limit || (fixed && (addr + len > high_limit)))
+ high_limit = TASK_SIZE;
if (len & ~huge_page_mask(h))
return -EINVAL;
- if (len > mm->task_size)
+ if (len > high_limit)
return -ENOMEM;
- if (flags & MAP_FIXED) {
+ if (fixed) {
+ if (addr > high_limit - len)
+ return -ENOMEM;
if (prepare_hugepage_range(file, addr, len))
return -EINVAL;
return addr;
@@ -68,7 +73,7 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (addr) {
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
- if (mm->task_size - len >= addr &&
+ if (high_limit - len >= addr &&
(!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
@@ -79,12 +84,9 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = PAGE_SIZE;
- info.high_limit = current->mm->mmap_base;
+ info.high_limit = mm->mmap_base + (high_limit - DEFAULT_MAP_WINDOW);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
- if (addr > DEFAULT_MAP_WINDOW)
- info.high_limit += mm->context.addr_limit - DEFAULT_MAP_WINDOW;
-
return vm_unmapped_area(&info);
}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 588a521966ec..a07722531b32 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -68,11 +68,11 @@
#include "mmu_decl.h"
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
#if H_PGTABLE_RANGE > USER_VSID_RANGE
#warning Limited user VSID range means pagetable space is wasted
#endif
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
phys_addr_t memstart_addr = ~0;
EXPORT_SYMBOL_GPL(memstart_addr);
@@ -367,11 +367,20 @@ EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
-#ifdef CONFIG_PPC_STD_MMU_64
-static bool disable_radix;
+#ifdef CONFIG_PPC_BOOK3S_64
+static bool disable_radix = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
+
static int __init parse_disable_radix(char *p)
{
- disable_radix = true;
+ bool val;
+
+ if (strlen(p) == 0)
+ val = true;
+ else if (kstrtobool(p, &val))
+ return -EINVAL;
+
+ disable_radix = val;
+
return 0;
}
early_param("disable_radix", parse_disable_radix);
@@ -444,4 +453,4 @@ void __init mmu_early_init_devtree(void)
else
hash__early_init_devtree();
}
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 5d78b193fec4..d503f344e476 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -106,22 +106,27 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
+ int fixed = (flags & MAP_FIXED);
+ unsigned long high_limit;
struct vm_unmapped_area_info info;
- if (unlikely(addr > mm->context.addr_limit &&
- mm->context.addr_limit != TASK_SIZE))
- mm->context.addr_limit = TASK_SIZE;
+ high_limit = DEFAULT_MAP_WINDOW;
+ if (addr >= high_limit || (fixed && (addr + len > high_limit)))
+ high_limit = TASK_SIZE;
- if (len > mm->task_size - mmap_min_addr)
+ if (len > high_limit)
return -ENOMEM;
- if (flags & MAP_FIXED)
+ if (fixed) {
+ if (addr > high_limit - len)
+ return -ENOMEM;
return addr;
+ }
if (addr) {
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
- if (mm->task_size - len >= addr && addr >= mmap_min_addr &&
+ if (high_limit - len >= addr && addr >= mmap_min_addr &&
(!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
@@ -129,13 +134,9 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
info.flags = 0;
info.length = len;
info.low_limit = mm->mmap_base;
+ info.high_limit = high_limit;
info.align_mask = 0;
- if (unlikely(addr > DEFAULT_MAP_WINDOW))
- info.high_limit = mm->context.addr_limit;
- else
- info.high_limit = DEFAULT_MAP_WINDOW;
-
return vm_unmapped_area(&info);
}
@@ -149,37 +150,37 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
+ int fixed = (flags & MAP_FIXED);
+ unsigned long high_limit;
struct vm_unmapped_area_info info;
- if (unlikely(addr > mm->context.addr_limit &&
- mm->context.addr_limit != TASK_SIZE))
- mm->context.addr_limit = TASK_SIZE;
+ high_limit = DEFAULT_MAP_WINDOW;
+ if (addr >= high_limit || (fixed && (addr + len > high_limit)))
+ high_limit = TASK_SIZE;
- /* requested length too big for entire address space */
- if (len > mm->task_size - mmap_min_addr)
+ if (len > high_limit)
return -ENOMEM;
- if (flags & MAP_FIXED)
+ if (fixed) {
+ if (addr > high_limit - len)
+ return -ENOMEM;
return addr;
+ }
- /* requesting a specific address */
if (addr) {
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
- if (mm->task_size - len >= addr && addr >= mmap_min_addr &&
- (!vma || addr + len <= vm_start_gap(vma)))
+ if (high_limit - len >= addr && addr >= mmap_min_addr &&
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = max(PAGE_SIZE, mmap_min_addr);
- info.high_limit = mm->mmap_base;
+ info.high_limit = mm->mmap_base + (high_limit - DEFAULT_MAP_WINDOW);
info.align_mask = 0;
- if (addr > DEFAULT_MAP_WINDOW)
- info.high_limit += mm->context.addr_limit - DEFAULT_MAP_WINDOW;
-
addr = vm_unmapped_area(&info);
if (!(addr & ~PAGE_MASK))
return addr;
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index 0f613bc63c50..d60a62bf4fc7 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -34,15 +34,6 @@ static inline void switch_mm_pgdir(struct task_struct *tsk,
struct mm_struct *mm) { }
#endif
-#ifdef CONFIG_PPC_BOOK3S_64
-static inline void inc_mm_active_cpus(struct mm_struct *mm)
-{
- atomic_inc(&mm->context.active_cpus);
-}
-#else
-static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
-#endif
-
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index a7e998158f37..59c0766ae4e0 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -93,11 +93,11 @@ static int hash__init_new_context(struct mm_struct *mm)
return index;
/*
- * We do switch_slb() early in fork, even before we setup the
- * mm->context.addr_limit. Default to max task size so that we copy the
- * default values to paca which will help us to handle slb miss early.
+ * In the case of exec, use the default limit,
+ * otherwise inherit it from the mm we are duplicating.
*/
- mm->context.addr_limit = DEFAULT_MAP_WINDOW_USER64;
+ if (!mm->context.slb_addr_limit)
+ mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
/*
* The old code would re-promote on fork, we don't do that when using
@@ -216,19 +216,34 @@ void destroy_context(struct mm_struct *mm)
#ifdef CONFIG_SPAPR_TCE_IOMMU
WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
#endif
+ if (radix_enabled())
+ WARN_ON(process_tb[mm->context.id].prtb0 != 0);
+ else
+ subpage_prot_free(mm);
+ destroy_pagetable_page(mm);
+ __destroy_context(mm->context.id);
+ mm->context.id = MMU_NO_CONTEXT;
+}
+
+void arch_exit_mmap(struct mm_struct *mm)
+{
if (radix_enabled()) {
/*
* Radix doesn't have a valid bit in the process table
* entries. However we know that at least P9 implementation
* will avoid caching an entry with an invalid RTS field,
* and 0 is invalid. So this will do.
+ *
+ * This runs before the "fullmm" tlb flush in exit_mmap,
+ * which does a RIC=2 tlbie to clear the process table
+ * entry. See the "fullmm" comments in tlb-radix.c.
+ *
+ * No barrier required here after the store because
+ * this process will do the invalidate, which starts with
+ * ptesync.
*/
process_tb[mm->context.id].prtb0 = 0;
- } else
- subpage_prot_free(mm);
- destroy_pagetable_page(mm);
- __destroy_context(mm->context.id);
- mm->context.id = MMU_NO_CONTEXT;
+ }
}
#ifdef CONFIG_PPC_RADIX_MMU
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 73016451f330..adb6364f4091 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1148,11 +1148,33 @@ struct topology_update_data {
int new_nid;
};
+#define TOPOLOGY_DEF_TIMER_SECS 60
+
static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
static cpumask_t cpu_associativity_changes_mask;
static int vphn_enabled;
static int prrn_enabled;
static void reset_topology_timer(void);
+static int topology_timer_secs = 1;
+static int topology_inited;
+static int topology_update_needed;
+
+/*
+ * Change polling interval for associativity changes.
+ */
+int timed_topology_update(int nsecs)
+{
+ if (vphn_enabled) {
+ if (nsecs > 0)
+ topology_timer_secs = nsecs;
+ else
+ topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS;
+
+ reset_topology_timer();
+ }
+
+ return 0;
+}
/*
* Store the current values of the associativity change counters in the
@@ -1246,6 +1268,11 @@ static long vphn_get_associativity(unsigned long cpu,
"hcall_vphn() experienced a hardware fault "
"preventing VPHN. Disabling polling...\n");
stop_topology_update();
+ break;
+ case H_SUCCESS:
+ dbg("VPHN hcall succeeded. Reset polling...\n");
+ timed_topology_update(0);
+ break;
}
return rc;
@@ -1323,8 +1350,11 @@ int numa_update_cpu_topology(bool cpus_locked)
struct device *dev;
int weight, new_nid, i = 0;
- if (!prrn_enabled && !vphn_enabled)
+ if (!prrn_enabled && !vphn_enabled) {
+ if (!topology_inited)
+ topology_update_needed = 1;
return 0;
+ }
weight = cpumask_weight(&cpu_associativity_changes_mask);
if (!weight)
@@ -1363,22 +1393,30 @@ int numa_update_cpu_topology(bool cpus_locked)
cpumask_andnot(&cpu_associativity_changes_mask,
&cpu_associativity_changes_mask,
cpu_sibling_mask(cpu));
+ dbg("Assoc chg gives same node %d for cpu%d\n",
+ new_nid, cpu);
cpu = cpu_last_thread_sibling(cpu);
continue;
}
for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
ud = &updates[i++];
+ ud->next = &updates[i];
ud->cpu = sibling;
ud->new_nid = new_nid;
ud->old_nid = numa_cpu_lookup_table[sibling];
cpumask_set_cpu(sibling, &updated_cpus);
- if (i < weight)
- ud->next = &updates[i];
}
cpu = cpu_last_thread_sibling(cpu);
}
+ /*
+ * Prevent processing of 'updates' from overflowing array
+ * where last entry filled in a 'next' pointer.
+ */
+ if (i)
+ updates[i-1].next = NULL;
+
pr_debug("Topology update for the following CPUs:\n");
if (cpumask_weight(&updated_cpus)) {
for (ud = &updates[0]; ud; ud = ud->next) {
@@ -1433,6 +1471,7 @@ int numa_update_cpu_topology(bool cpus_locked)
out:
kfree(updates);
+ topology_update_needed = 0;
return changed;
}
@@ -1466,7 +1505,7 @@ static struct timer_list topology_timer;
static void reset_topology_timer(void)
{
- mod_timer(&topology_timer, jiffies + 60 * HZ);
+ mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ);
}
#ifdef CONFIG_SMP
@@ -1515,15 +1554,14 @@ int start_topology_update(void)
if (firmware_has_feature(FW_FEATURE_PRRN)) {
if (!prrn_enabled) {
prrn_enabled = 1;
- vphn_enabled = 0;
#ifdef CONFIG_SMP
rc = of_reconfig_notifier_register(&dt_update_nb);
#endif
}
- } else if (firmware_has_feature(FW_FEATURE_VPHN) &&
+ }
+ if (firmware_has_feature(FW_FEATURE_VPHN) &&
lppaca_shared_proc(get_lppaca())) {
if (!vphn_enabled) {
- prrn_enabled = 0;
vphn_enabled = 1;
setup_cpu_associativity_change_counters();
timer_setup(&topology_timer, topology_timer_fn,
@@ -1547,7 +1585,8 @@ int stop_topology_update(void)
#ifdef CONFIG_SMP
rc = of_reconfig_notifier_unregister(&dt_update_nb);
#endif
- } else if (vphn_enabled) {
+ }
+ if (vphn_enabled) {
vphn_enabled = 0;
rc = del_timer_sync(&topology_timer);
}
@@ -1610,9 +1649,17 @@ static int topology_update_init(void)
if (topology_updates_enabled)
start_topology_update();
+ if (vphn_enabled)
+ topology_schedule_update();
+
if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops))
return -ENOMEM;
+ topology_inited = 1;
+ if (topology_update_needed)
+ bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask),
+ nr_cpumask_bits);
+
return 0;
}
device_initcall(topology_update_init);
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 39c252b54d16..cfbbee941a76 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -169,6 +169,16 @@ void radix__mark_rodata_ro(void)
{
unsigned long start, end;
+ /*
+ * mark_rodata_ro() will mark itself as !writable at some point.
+ * Due to DD1 workaround in radix__pte_update(), we'll end up with
+ * an invalid pte and the system will crash quite severly.
+ */
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+ pr_warn("Warning: Unable to mark rodata read only on P9 DD1\n");
+ return;
+ }
+
start = (unsigned long)_stext;
end = (unsigned long)__init_begin;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 1ec3aee43624..813ea22c3e00 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -57,7 +57,7 @@
#include "mmu_decl.h"
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
#error TASK_SIZE_USER64 exceeds user VSID range
#endif
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 906a86fe457b..2cf5ef3fc50d 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -167,7 +167,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
/*
* user space make sure we are within the allowed limit
*/
- ld r11,PACA_ADDR_LIMIT(r13)
+ ld r11,PACA_SLB_ADDR_LIMIT(r13)
cmpld r3,r11
bge- 8f
@@ -309,10 +309,6 @@ slb_compare_rr_to_size:
srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */
rldimi r10,r9,ESID_BITS_1T,0
ASM_VSID_SCRAMBLE(r10,r9,r11,1T)
- /*
- * bits above VSID_BITS_1T need to be ignored from r10
- * also combine VSID and flags
- */
li r10,MMU_SEGSIZE_1T
rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 45f6740dd407..564fff06f5c1 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -96,7 +96,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
{
struct vm_area_struct *vma;
- if ((mm->task_size - len) < addr)
+ if ((mm->context.slb_addr_limit - len) < addr)
return 0;
vma = find_vma(mm, addr);
return (!vma || (addr + len) <= vm_start_gap(vma));
@@ -133,10 +133,10 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret)
if (!slice_low_has_vma(mm, i))
ret->low_slices |= 1u << i;
- if (mm->task_size <= SLICE_LOW_TOP)
+ if (mm->context.slb_addr_limit <= SLICE_LOW_TOP)
return;
- for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.addr_limit); i++)
+ for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++)
if (!slice_high_has_vma(mm, i))
__set_bit(i, ret->high_slices);
}
@@ -157,7 +157,7 @@ static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_ma
ret->low_slices |= 1u << i;
hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.addr_limit); i++) {
+ for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
mask_index = i & 0x1;
index = i >> 1;
if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
@@ -169,7 +169,7 @@ static int slice_check_fit(struct mm_struct *mm,
struct slice_mask mask, struct slice_mask available)
{
DECLARE_BITMAP(result, SLICE_NUM_HIGH);
- unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.addr_limit);
+ unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit);
bitmap_and(result, mask.high_slices,
available.high_slices, slice_count);
@@ -219,7 +219,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
mm->context.low_slices_psize = lpsizes;
hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.addr_limit); i++) {
+ for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
mask_index = i & 0x1;
index = i >> 1;
if (test_bit(i, mask.high_slices))
@@ -329,8 +329,8 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
* Only for that request for which high_limit is above
* DEFAULT_MAP_WINDOW we should apply this.
*/
- if (high_limit > DEFAULT_MAP_WINDOW)
- addr += mm->context.addr_limit - DEFAULT_MAP_WINDOW;
+ if (high_limit > DEFAULT_MAP_WINDOW)
+ addr += mm->context.slb_addr_limit - DEFAULT_MAP_WINDOW;
while (addr > PAGE_SIZE) {
info.high_limit = addr;
@@ -412,25 +412,31 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
struct slice_mask compat_mask;
int fixed = (flags & MAP_FIXED);
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+ unsigned long page_size = 1UL << pshift;
struct mm_struct *mm = current->mm;
unsigned long newaddr;
unsigned long high_limit;
- /*
- * Check if we need to expland slice area.
- */
- if (unlikely(addr > mm->context.addr_limit &&
- mm->context.addr_limit != TASK_SIZE)) {
- mm->context.addr_limit = TASK_SIZE;
+ high_limit = DEFAULT_MAP_WINDOW;
+ if (addr >= high_limit || (fixed && (addr + len > high_limit)))
+ high_limit = TASK_SIZE;
+
+ if (len > high_limit)
+ return -ENOMEM;
+ if (len & (page_size - 1))
+ return -EINVAL;
+ if (fixed) {
+ if (addr & (page_size - 1))
+ return -EINVAL;
+ if (addr > high_limit - len)
+ return -ENOMEM;
+ }
+
+ if (high_limit > mm->context.slb_addr_limit) {
+ mm->context.slb_addr_limit = high_limit;
on_each_cpu(slice_flush_segments, mm, 1);
}
- /*
- * This mmap request can allocate upt to 512TB
- */
- if (addr > DEFAULT_MAP_WINDOW)
- high_limit = mm->context.addr_limit;
- else
- high_limit = DEFAULT_MAP_WINDOW;
+
/*
* init different masks
*/
@@ -446,27 +452,19 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
/* Sanity checks */
BUG_ON(mm->task_size == 0);
+ BUG_ON(mm->context.slb_addr_limit == 0);
VM_BUG_ON(radix_enabled());
slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d\n",
addr, len, flags, topdown);
- if (len > mm->task_size)
- return -ENOMEM;
- if (len & ((1ul << pshift) - 1))
- return -EINVAL;
- if (fixed && (addr & ((1ul << pshift) - 1)))
- return -EINVAL;
- if (fixed && addr > (mm->task_size - len))
- return -ENOMEM;
-
/* If hint, make sure it matches our alignment restrictions */
if (!fixed && addr) {
- addr = _ALIGN_UP(addr, 1ul << pshift);
+ addr = _ALIGN_UP(addr, page_size);
slice_dbg(" aligned addr=%lx\n", addr);
/* Ignore hint if it's too large or overlaps a VMA */
- if (addr > mm->task_size - len ||
+ if (addr > high_limit - len ||
!slice_area_is_free(mm, addr, len))
addr = 0;
}
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index d304028641a2..884f4b705b57 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -39,6 +39,20 @@ static inline void __tlbiel_pid(unsigned long pid, int set,
trace_tlbie(0, 1, rb, rs, ric, prs, r);
}
+static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
+{
+ unsigned long rb,rs,prs,r;
+
+ rb = PPC_BIT(53); /* IS = 1 */
+ rs = pid << PPC_BITLSHIFT(31);
+ prs = 1; /* process scoped */
+ r = 1; /* raidx format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
/*
* We use 128 set in radix mode and 256 set in hpt mode.
*/
@@ -70,22 +84,13 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
{
- unsigned long rb,rs,prs,r;
-
- rb = PPC_BIT(53); /* IS = 1 */
- rs = pid << PPC_BITLSHIFT(31);
- prs = 1; /* process scoped */
- r = 1; /* raidx format */
-
asm volatile("ptesync": : :"memory");
- asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
- : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ __tlbie_pid(pid, ric);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
- trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
-static inline void _tlbiel_va(unsigned long va, unsigned long pid,
- unsigned long ap, unsigned long ric)
+static inline void __tlbiel_va(unsigned long va, unsigned long pid,
+ unsigned long ap, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -95,14 +100,44 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
prs = 1; /* process scoped */
r = 1; /* raidx format */
- asm volatile("ptesync": : :"memory");
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
- asm volatile("ptesync": : :"memory");
trace_tlbie(0, 1, rb, rs, ric, prs, r);
}
-static inline void _tlbie_va(unsigned long va, unsigned long pid,
+static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
+{
+ unsigned long addr;
+ unsigned long ap = mmu_get_ap(psize);
+
+ for (addr = start; addr < end; addr += page_size)
+ __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
+}
+
+static inline void _tlbiel_va(unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
+{
+ unsigned long ap = mmu_get_ap(psize);
+
+ asm volatile("ptesync": : :"memory");
+ __tlbiel_va(va, pid, ap, ric);
+ asm volatile("ptesync": : :"memory");
+}
+
+static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize, bool also_pwc)
+{
+ asm volatile("ptesync": : :"memory");
+ if (also_pwc)
+ __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
+ __tlbiel_va_range(start, end, pid, page_size, psize);
+ asm volatile("ptesync": : :"memory");
+}
+
+static inline void __tlbie_va(unsigned long va, unsigned long pid,
unsigned long ap, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -113,13 +148,43 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
prs = 1; /* process scoped */
r = 1; /* raidx format */
- asm volatile("ptesync": : :"memory");
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
- asm volatile("eieio; tlbsync; ptesync": : :"memory");
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
+static inline void __tlbie_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
+{
+ unsigned long addr;
+ unsigned long ap = mmu_get_ap(psize);
+
+ for (addr = start; addr < end; addr += page_size)
+ __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+}
+
+static inline void _tlbie_va(unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
+{
+ unsigned long ap = mmu_get_ap(psize);
+
+ asm volatile("ptesync": : :"memory");
+ __tlbie_va(va, pid, ap, ric);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+static inline void _tlbie_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize, bool also_pwc)
+{
+ asm volatile("ptesync": : :"memory");
+ if (also_pwc)
+ __tlbie_pid(pid, RIC_FLUSH_PWC);
+ __tlbie_va_range(start, end, pid, page_size, psize);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
/*
* Base TLB flushing operations:
*
@@ -144,7 +209,7 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm)
EXPORT_SYMBOL(radix__local_flush_tlb_mm);
#ifndef CONFIG_SMP
-static void radix__local_flush_all_mm(struct mm_struct *mm)
+void radix__local_flush_all_mm(struct mm_struct *mm)
{
unsigned long pid;
@@ -154,18 +219,18 @@ static void radix__local_flush_all_mm(struct mm_struct *mm)
_tlbiel_pid(pid, RIC_FLUSH_ALL);
preempt_enable();
}
+EXPORT_SYMBOL(radix__local_flush_all_mm);
#endif /* CONFIG_SMP */
void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
int psize)
{
unsigned long pid;
- unsigned long ap = mmu_get_ap(psize);
preempt_disable();
- pid = mm ? mm->context.id : 0;
+ pid = mm->context.id;
if (pid != MMU_NO_CONTEXT)
- _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
+ _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
preempt_enable();
}
@@ -173,11 +238,10 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd
{
#ifdef CONFIG_HUGETLB_PAGE
/* need the return fix for nohash.c */
- if (vma && is_vm_hugetlb_page(vma))
- return __local_flush_hugetlb_page(vma, vmaddr);
+ if (is_vm_hugetlb_page(vma))
+ return radix__local_flush_hugetlb_page(vma, vmaddr);
#endif
- radix__local_flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr,
- mmu_virtual_psize);
+ radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
}
EXPORT_SYMBOL(radix__local_flush_tlb_page);
@@ -186,36 +250,35 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
{
unsigned long pid;
- preempt_disable();
pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
- goto no_context;
+ return;
+ preempt_disable();
if (!mm_is_thread_local(mm))
_tlbie_pid(pid, RIC_FLUSH_TLB);
else
_tlbiel_pid(pid, RIC_FLUSH_TLB);
-no_context:
preempt_enable();
}
EXPORT_SYMBOL(radix__flush_tlb_mm);
-static void radix__flush_all_mm(struct mm_struct *mm)
+void radix__flush_all_mm(struct mm_struct *mm)
{
unsigned long pid;
- preempt_disable();
pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
- goto no_context;
+ return;
+ preempt_disable();
if (!mm_is_thread_local(mm))
_tlbie_pid(pid, RIC_FLUSH_ALL);
else
_tlbiel_pid(pid, RIC_FLUSH_ALL);
-no_context:
preempt_enable();
}
+EXPORT_SYMBOL(radix__flush_all_mm);
void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
{
@@ -227,28 +290,26 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
int psize)
{
unsigned long pid;
- unsigned long ap = mmu_get_ap(psize);
- preempt_disable();
- pid = mm ? mm->context.id : 0;
+ pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
- goto bail;
+ return;
+
+ preempt_disable();
if (!mm_is_thread_local(mm))
- _tlbie_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
+ _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
else
- _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
-bail:
+ _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
preempt_enable();
}
void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
{
#ifdef CONFIG_HUGETLB_PAGE
- if (vma && is_vm_hugetlb_page(vma))
- return flush_hugetlb_page(vma, vmaddr);
+ if (is_vm_hugetlb_page(vma))
+ return radix__flush_hugetlb_page(vma, vmaddr);
#endif
- radix__flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr,
- mmu_virtual_psize);
+ radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
}
EXPORT_SYMBOL(radix__flush_tlb_page);
@@ -262,17 +323,86 @@ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
}
EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
+#define TLB_FLUSH_ALL -1UL
+
/*
- * Currently, for range flushing, we just do a full mm flush. Because
- * we use this in code path where we don' track the page size.
+ * Number of pages above which we invalidate the entire PID rather than
+ * flush individual pages, for local and global flushes respectively.
+ *
+ * tlbie goes out to the interconnect and individual ops are more costly.
+ * It also does not iterate over sets like the local tlbiel variant when
+ * invalidating a full PID, so it has a far lower threshold to change from
+ * individual page flushes to full-pid flushes.
*/
+static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
+static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
+
void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
struct mm_struct *mm = vma->vm_mm;
+ unsigned long pid;
+ unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
+ unsigned long page_size = 1UL << page_shift;
+ unsigned long nr_pages = (end - start) >> page_shift;
+ bool local, full;
+
+#ifdef CONFIG_HUGETLB_PAGE
+ if (is_vm_hugetlb_page(vma))
+ return radix__flush_hugetlb_tlb_range(vma, start, end);
+#endif
+
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ return;
- radix__flush_tlb_mm(mm);
+ preempt_disable();
+ if (mm_is_thread_local(mm)) {
+ local = true;
+ full = (end == TLB_FLUSH_ALL ||
+ nr_pages > tlb_local_single_page_flush_ceiling);
+ } else {
+ local = false;
+ full = (end == TLB_FLUSH_ALL ||
+ nr_pages > tlb_single_page_flush_ceiling);
+ }
+
+ if (full) {
+ if (local)
+ _tlbiel_pid(pid, RIC_FLUSH_TLB);
+ else
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ } else {
+ bool hflush = false;
+ unsigned long hstart, hend;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT;
+ hend = end >> HPAGE_PMD_SHIFT;
+ if (hstart < hend) {
+ hstart <<= HPAGE_PMD_SHIFT;
+ hend <<= HPAGE_PMD_SHIFT;
+ hflush = true;
+ }
+#endif
+
+ asm volatile("ptesync": : :"memory");
+ if (local) {
+ __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
+ if (hflush)
+ __tlbiel_va_range(hstart, hend, pid,
+ HPAGE_PMD_SIZE, MMU_PAGE_2M);
+ asm volatile("ptesync": : :"memory");
+ } else {
+ __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
+ if (hflush)
+ __tlbie_va_range(hstart, hend, pid,
+ HPAGE_PMD_SIZE, MMU_PAGE_2M);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+ }
+ preempt_enable();
}
EXPORT_SYMBOL(radix__flush_tlb_range);
@@ -291,101 +421,118 @@ static int radix_get_mmu_psize(int page_size)
return psize;
}
+static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize);
+
void radix__tlb_flush(struct mmu_gather *tlb)
{
int psize = 0;
struct mm_struct *mm = tlb->mm;
int page_size = tlb->page_size;
- psize = radix_get_mmu_psize(page_size);
/*
* if page size is not something we understand, do a full mm flush
+ *
+ * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
+ * that flushes the process table entry cache upon process teardown.
+ * See the comment for radix in arch_exit_mmap().
*/
- if (psize != -1 && !tlb->fullmm && !tlb->need_flush_all)
- radix__flush_tlb_range_psize(mm, tlb->start, tlb->end, psize);
- else if (tlb->need_flush_all) {
- tlb->need_flush_all = 0;
+ if (tlb->fullmm) {
radix__flush_all_mm(mm);
- } else
- radix__flush_tlb_mm(mm);
-}
+ } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
+ if (!tlb->need_flush_all)
+ radix__flush_tlb_mm(mm);
+ else
+ radix__flush_all_mm(mm);
+ } else {
+ unsigned long start = tlb->start;
+ unsigned long end = tlb->end;
-#define TLB_FLUSH_ALL -1UL
-/*
- * Number of pages above which we will do a bcast tlbie. Just a
- * number at this point copied from x86
- */
-static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
+ if (!tlb->need_flush_all)
+ radix__flush_tlb_range_psize(mm, start, end, psize);
+ else
+ radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
+ }
+ tlb->need_flush_all = 0;
+}
-void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
- unsigned long end, int psize)
+static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ int psize, bool also_pwc)
{
unsigned long pid;
- unsigned long addr;
- int local = mm_is_thread_local(mm);
- unsigned long ap = mmu_get_ap(psize);
- unsigned long page_size = 1UL << mmu_psize_defs[psize].shift;
+ unsigned int page_shift = mmu_psize_defs[psize].shift;
+ unsigned long page_size = 1UL << page_shift;
+ unsigned long nr_pages = (end - start) >> page_shift;
+ bool local, full;
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ return;
preempt_disable();
- pid = mm ? mm->context.id : 0;
- if (unlikely(pid == MMU_NO_CONTEXT))
- goto err_out;
+ if (mm_is_thread_local(mm)) {
+ local = true;
+ full = (end == TLB_FLUSH_ALL ||
+ nr_pages > tlb_local_single_page_flush_ceiling);
+ } else {
+ local = false;
+ full = (end == TLB_FLUSH_ALL ||
+ nr_pages > tlb_single_page_flush_ceiling);
+ }
- if (end == TLB_FLUSH_ALL ||
- (end - start) > tlb_single_page_flush_ceiling * page_size) {
+ if (full) {
if (local)
- _tlbiel_pid(pid, RIC_FLUSH_TLB);
+ _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
else
- _tlbie_pid(pid, RIC_FLUSH_TLB);
- goto err_out;
- }
- for (addr = start; addr < end; addr += page_size) {
-
+ _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL: RIC_FLUSH_TLB);
+ } else {
if (local)
- _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
+ _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
else
- _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+ _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
}
-err_out:
preempt_enable();
}
+void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize)
+{
+ return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
+}
+
+static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize)
+{
+ __radix__flush_tlb_range_psize(mm, start, end, psize, true);
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
{
- int local = mm_is_thread_local(mm);
- unsigned long ap = mmu_get_ap(mmu_virtual_psize);
unsigned long pid, end;
-
- pid = mm ? mm->context.id : 0;
- preempt_disable();
+ pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
- goto no_context;
+ return;
/* 4k page size, just blow the world */
if (PAGE_SIZE == 0x1000) {
radix__flush_all_mm(mm);
- preempt_enable();
return;
}
- /* Otherwise first do the PWC */
- if (local)
- _tlbiel_pid(pid, RIC_FLUSH_PWC);
- else
- _tlbie_pid(pid, RIC_FLUSH_PWC);
-
- /* Then iterate the pages */
end = addr + HPAGE_PMD_SIZE;
- for (; addr < end; addr += PAGE_SIZE) {
- if (local)
- _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
- else
- _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+
+ /* Otherwise first do the PWC, then iterate the pages. */
+ preempt_disable();
+
+ if (mm_is_thread_local(mm)) {
+ _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+ } else {
+ _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
}
-no_context:
+
preempt_enable();
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index 62fa7589db2b..8bdef7ed28a8 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -23,7 +23,7 @@
* [ nv gpr save area ] 8*8 |
* [ tail_call_cnt ] 8 |
* [ local_tmp_var ] 8 |
- * fp (r31) --> [ ebpf stack space ] 512 |
+ * fp (r31) --> [ ebpf stack space ] upto 512 |
* [ frame header ] 32/112 |
* sp (r1) ---> [ stack pointer ] --------------
*/
@@ -32,8 +32,8 @@
#define BPF_PPC_STACK_SAVE (8*8)
/* for bpf JIT code internal usage */
#define BPF_PPC_STACK_LOCALS 16
-/* Ensure this is quadword aligned */
-#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + MAX_BPF_STACK + \
+/* stack frame excluding BPF stack, ensure this is quadword aligned */
+#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \
BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE)
#ifndef __ASSEMBLY__
@@ -103,6 +103,7 @@ struct codegen_context {
*/
unsigned int seen;
unsigned int idx;
+ unsigned int stack_size;
};
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index a66e64b0b251..46d74e81aff1 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -69,7 +69,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
static int bpf_jit_stack_local(struct codegen_context *ctx)
{
if (bpf_has_stack_frame(ctx))
- return STACK_FRAME_MIN_SIZE + MAX_BPF_STACK;
+ return STACK_FRAME_MIN_SIZE + ctx->stack_size;
else
return -(BPF_PPC_STACK_SAVE + 16);
}
@@ -82,8 +82,9 @@ static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx)
static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
{
if (reg >= BPF_PPC_NVR_MIN && reg < 32)
- return (bpf_has_stack_frame(ctx) ? BPF_PPC_STACKFRAME : 0)
- - (8 * (32 - reg));
+ return (bpf_has_stack_frame(ctx) ?
+ (BPF_PPC_STACKFRAME + ctx->stack_size) : 0)
+ - (8 * (32 - reg));
pr_err("BPF JIT is asking about unknown registers");
BUG();
@@ -134,7 +135,7 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
PPC_BPF_STL(0, 1, PPC_LR_STKOFF);
}
- PPC_BPF_STLU(1, 1, -BPF_PPC_STACKFRAME);
+ PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size));
}
/*
@@ -161,7 +162,7 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
/* Setup frame pointer to point to the bpf stack area */
if (bpf_is_seen_register(ctx, BPF_REG_FP))
PPC_ADDI(b2p[BPF_REG_FP], 1,
- STACK_FRAME_MIN_SIZE + MAX_BPF_STACK);
+ STACK_FRAME_MIN_SIZE + ctx->stack_size);
}
static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
@@ -183,7 +184,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
/* Tear down our stack frame */
if (bpf_has_stack_frame(ctx)) {
- PPC_ADDI(1, 1, BPF_PPC_STACKFRAME);
+ PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size);
if (ctx->seen & SEEN_FUNC) {
PPC_BPF_LL(0, 1, PPC_LR_STKOFF);
PPC_MTLR(0);
@@ -1013,6 +1014,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
memset(&cgctx, 0, sizeof(struct codegen_context));
+ /* Make sure that the stack is quadword aligned. */
+ cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
+
/* Scouting faux-generate pass 0 */
if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
/* We hit something illegal or unsupported. */
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c
index c82497a31c54..264b6ab11978 100644
--- a/arch/powerpc/oprofile/op_model_cell.c
+++ b/arch/powerpc/oprofile/op_model_cell.c
@@ -555,9 +555,7 @@ static void cell_virtual_cntr(unsigned long data)
static void start_virt_cntrs(void)
{
- init_timer(&timer_virt_cntr);
- timer_virt_cntr.function = cell_virtual_cntr;
- timer_virt_cntr.data = 0UL;
+ setup_timer(&timer_virt_cntr, cell_virtual_cntr, 0UL);
timer_virt_cntr.expires = jiffies + HZ / 10;
add_timer(&timer_virt_cntr);
}
@@ -679,9 +677,7 @@ static void spu_evnt_swap(unsigned long data)
static void start_spu_event_swap(void)
{
- init_timer(&timer_spu_event_swap);
- timer_spu_event_swap.function = spu_evnt_swap;
- timer_spu_event_swap.data = 0UL;
+ setup_timer(&timer_spu_event_swap, spu_evnt_swap, 0UL);
timer_spu_event_swap.expires = jiffies + HZ / 25;
add_timer(&timer_spu_event_swap);
}
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 9c88b82f6229..72238eedc360 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -540,7 +540,7 @@ static int memord(const void *d1, size_t s1, const void *d2, size_t s2)
{
if (s1 < s2)
return 1;
- if (s2 > s1)
+ if (s1 > s2)
return -1;
return memcmp(d1, d2, s1);
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index a78f255111f2..ae07470fde3c 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -295,10 +295,6 @@ config PPC_STD_MMU_32
def_bool y
depends on PPC_STD_MMU && PPC32
-config PPC_STD_MMU_64
- def_bool y
- depends on PPC_STD_MMU && PPC64
-
config PPC_RADIX_MMU
bool "Radix MMU Support"
depends on PPC_BOOK3S_64
@@ -309,6 +305,19 @@ config PPC_RADIX_MMU
is only implemented by IBM Power9 CPUs, if you don't have one of them
you can probably disable this.
+config PPC_RADIX_MMU_DEFAULT
+ bool "Default to using the Radix MMU when possible"
+ depends on PPC_RADIX_MMU
+ default y
+ help
+ When the hardware supports the Radix MMU, default to using it unless
+ "disable_radix[=yes]" is specified on the kernel command line.
+
+ If this option is disabled, the Hash MMU will be used by default,
+ unless "disable_radix=no" is specified on the kernel command line.
+
+ If you're unsure, say Y.
+
config ARCH_ENABLE_HUGEPAGE_MIGRATION
def_bool y
depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION
@@ -324,7 +333,7 @@ config PPC_BOOK3E_MMU
config PPC_MM_SLICES
bool
- default y if PPC_STD_MMU_64
+ default y if PPC_BOOK3S_64
default n
config PPC_HAVE_PMU_SUPPORT
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index 70183eb3d5c8..39a1d4225e0f 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -513,9 +513,7 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
mutex_init(&host->mutex);
init_completion(&host->complete);
spin_lock_init(&host->lock);
- init_timer(&host->timeout_timer);
- host->timeout_timer.function = kw_i2c_timeout;
- host->timeout_timer.data = (unsigned long)host;
+ setup_timer(&host->timeout_timer, kw_i2c_timeout, (unsigned long)host);
psteps = of_get_property(np, "AAPL,address-step", NULL);
steps = psteps ? (*psteps) : 0x10;
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 7a31c26500e6..3732118a0482 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -15,4 +15,5 @@ obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o
obj-$(CONFIG_OPAL_PRD) += opal-prd.o
obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o
-obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o
+obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o
+obj-$(CONFIG_PPC_FTW) += nx-ftw.o
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 8864065eba22..4650fb294e7a 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -41,7 +41,6 @@
#include "powernv.h"
#include "pci.h"
-static bool pnv_eeh_nb_init = false;
static int eeh_event_irq = -EINVAL;
static int pnv_eeh_init(void)
@@ -197,31 +196,31 @@ PNV_EEH_DBGFS_ENTRY(inbB, 0xE10);
* been built. If the I/O cache staff has been built, EEH is
* ready to supply service.
*/
-static int pnv_eeh_post_init(void)
+int pnv_eeh_post_init(void)
{
struct pci_controller *hose;
struct pnv_phb *phb;
int ret = 0;
- /* Register OPAL event notifier */
- if (!pnv_eeh_nb_init) {
- eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
- if (eeh_event_irq < 0) {
- pr_err("%s: Can't register OPAL event interrupt (%d)\n",
- __func__, eeh_event_irq);
- return eeh_event_irq;
- }
+ /* Probe devices & build address cache */
+ eeh_probe_devices();
+ eeh_addr_cache_build();
- ret = request_irq(eeh_event_irq, pnv_eeh_event,
- IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL);
- if (ret < 0) {
- irq_dispose_mapping(eeh_event_irq);
- pr_err("%s: Can't request OPAL event interrupt (%d)\n",
- __func__, eeh_event_irq);
- return ret;
- }
+ /* Register OPAL event notifier */
+ eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
+ if (eeh_event_irq < 0) {
+ pr_err("%s: Can't register OPAL event interrupt (%d)\n",
+ __func__, eeh_event_irq);
+ return eeh_event_irq;
+ }
- pnv_eeh_nb_init = true;
+ ret = request_irq(eeh_event_irq, pnv_eeh_event,
+ IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL);
+ if (ret < 0) {
+ irq_dispose_mapping(eeh_event_irq);
+ pr_err("%s: Can't request OPAL event interrupt (%d)\n",
+ __func__, eeh_event_irq);
+ return ret;
}
if (!eeh_enabled())
@@ -367,6 +366,10 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
return NULL;
+ /* Skip if we haven't probed yet */
+ if (phb->ioda.pe_rmap[config_addr] == IODA_INVALID_PE)
+ return NULL;
+
/* Initialize eeh device */
edev->class_code = pdn->class_code;
edev->mode &= 0xFFFFFF00;
@@ -1731,7 +1734,6 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
static struct eeh_ops pnv_eeh_ops = {
.name = "powernv",
.init = pnv_eeh_init,
- .post_init = pnv_eeh_post_init,
.probe = pnv_eeh_probe,
.set_option = pnv_eeh_set_option,
.get_pe_addr = pnv_eeh_get_pe_addr,
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 2cb6cbea4b3b..f6cbc1a71472 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -395,6 +395,7 @@ struct npu_context {
struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS];
struct mmu_notifier mn;
struct kref kref;
+ bool nmmu_flush;
/* Callback to stop translation requests on a given GPU */
struct npu_context *(*release_cb)(struct npu_context *, void *);
@@ -545,11 +546,13 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
unsigned long pid = npu_context->mm->context.id;
- /*
- * Unfortunately the nest mmu does not support flushing specific
- * addresses so we have to flush the whole mm.
- */
- flush_tlb_mm(npu_context->mm);
+ if (npu_context->nmmu_flush)
+ /*
+ * Unfortunately the nest mmu does not support flushing specific
+ * addresses so we have to flush the whole mm once before
+ * shooting down the GPU translation.
+ */
+ flush_all_mm(npu_context->mm);
/*
* Loop over all the NPUs this process is active on and launch
@@ -722,6 +725,16 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
return ERR_PTR(-ENODEV);
npu_context->npdev[npu->index][nvlink_index] = npdev;
+ if (!nphb->npu.nmmu_flush) {
+ /*
+ * If we're not explicitly flushing ourselves we need to mark
+ * the thread for global flushes
+ */
+ npu_context->nmmu_flush = false;
+ mm_context_add_copro(mm);
+ } else
+ npu_context->nmmu_flush = true;
+
return npu_context;
}
EXPORT_SYMBOL(pnv_npu2_init_context);
@@ -731,6 +744,9 @@ static void pnv_npu2_release_context(struct kref *kref)
struct npu_context *npu_context =
container_of(kref, struct npu_context, kref);
+ if (!npu_context->nmmu_flush)
+ mm_context_remove_copro(npu_context->mm);
+
npu_context->mm->context.npu_context = NULL;
mmu_notifier_unregister(&npu_context->mn,
npu_context->mm);
@@ -819,6 +835,8 @@ int pnv_npu2_init(struct pnv_phb *phb)
static int npu_index;
uint64_t rc = 0;
+ phb->npu.nmmu_flush =
+ of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush");
for_each_child_of_node(phb->hose->dn, dn) {
gpdev = pnv_pci_get_gpu_dev(get_pci_dev(dn));
if (gpdev) {
diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c
index cf33769a7b72..18a355fa15e8 100644
--- a/arch/powerpc/platforms/powernv/opal-async.c
+++ b/arch/powerpc/platforms/powernv/opal-async.c
@@ -1,7 +1,7 @@
/*
* PowerNV OPAL asynchronous completion interfaces
*
- * Copyright 2013 IBM Corp.
+ * Copyright 2013-2017 IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -23,40 +23,50 @@
#include <asm/machdep.h>
#include <asm/opal.h>
-#define N_ASYNC_COMPLETIONS 64
+enum opal_async_token_state {
+ ASYNC_TOKEN_UNALLOCATED = 0,
+ ASYNC_TOKEN_ALLOCATED,
+ ASYNC_TOKEN_DISPATCHED,
+ ASYNC_TOKEN_ABANDONED,
+ ASYNC_TOKEN_COMPLETED
+};
+
+struct opal_async_token {
+ enum opal_async_token_state state;
+ struct opal_msg response;
+};
-static DECLARE_BITMAP(opal_async_complete_map, N_ASYNC_COMPLETIONS) = {~0UL};
-static DECLARE_BITMAP(opal_async_token_map, N_ASYNC_COMPLETIONS);
static DECLARE_WAIT_QUEUE_HEAD(opal_async_wait);
static DEFINE_SPINLOCK(opal_async_comp_lock);
static struct semaphore opal_async_sem;
-static struct opal_msg *opal_async_responses;
static unsigned int opal_max_async_tokens;
+static struct opal_async_token *opal_async_tokens;
-int __opal_async_get_token(void)
+static int __opal_async_get_token(void)
{
unsigned long flags;
- int token;
+ int i, token = -EBUSY;
spin_lock_irqsave(&opal_async_comp_lock, flags);
- token = find_first_bit(opal_async_complete_map, opal_max_async_tokens);
- if (token >= opal_max_async_tokens) {
- token = -EBUSY;
- goto out;
- }
- if (__test_and_set_bit(token, opal_async_token_map)) {
- token = -EBUSY;
- goto out;
+ for (i = 0; i < opal_max_async_tokens; i++) {
+ if (opal_async_tokens[i].state == ASYNC_TOKEN_UNALLOCATED) {
+ opal_async_tokens[i].state = ASYNC_TOKEN_ALLOCATED;
+ token = i;
+ break;
+ }
}
- __clear_bit(token, opal_async_complete_map);
-
-out:
spin_unlock_irqrestore(&opal_async_comp_lock, flags);
return token;
}
+/*
+ * Note: If the returned token is used in an opal call and opal returns
+ * OPAL_ASYNC_COMPLETION you MUST call one of opal_async_wait_response() or
+ * opal_async_wait_response_interruptible() at least once before calling another
+ * opal_async_* function
+ */
int opal_async_get_token_interruptible(void)
{
int token;
@@ -73,9 +83,10 @@ int opal_async_get_token_interruptible(void)
}
EXPORT_SYMBOL_GPL(opal_async_get_token_interruptible);
-int __opal_async_release_token(int token)
+static int __opal_async_release_token(int token)
{
unsigned long flags;
+ int rc;
if (token < 0 || token >= opal_max_async_tokens) {
pr_err("%s: Passed token is out of range, token %d\n",
@@ -84,11 +95,26 @@ int __opal_async_release_token(int token)
}
spin_lock_irqsave(&opal_async_comp_lock, flags);
- __set_bit(token, opal_async_complete_map);
- __clear_bit(token, opal_async_token_map);
+ switch (opal_async_tokens[token].state) {
+ case ASYNC_TOKEN_COMPLETED:
+ case ASYNC_TOKEN_ALLOCATED:
+ opal_async_tokens[token].state = ASYNC_TOKEN_UNALLOCATED;
+ rc = 0;
+ break;
+ /*
+ * DISPATCHED and ABANDONED tokens must wait for OPAL to respond.
+ * Mark a DISPATCHED token as ABANDONED so that the response handling
+ * code knows no one cares and that it can free it then.
+ */
+ case ASYNC_TOKEN_DISPATCHED:
+ opal_async_tokens[token].state = ASYNC_TOKEN_ABANDONED;
+ /* Fall through */
+ default:
+ rc = 1;
+ }
spin_unlock_irqrestore(&opal_async_comp_lock, flags);
- return 0;
+ return rc;
}
int opal_async_release_token(int token)
@@ -96,12 +122,10 @@ int opal_async_release_token(int token)
int ret;
ret = __opal_async_release_token(token);
- if (ret)
- return ret;
-
- up(&opal_async_sem);
+ if (!ret)
+ up(&opal_async_sem);
- return 0;
+ return ret;
}
EXPORT_SYMBOL_GPL(opal_async_release_token);
@@ -117,22 +141,83 @@ int opal_async_wait_response(uint64_t token, struct opal_msg *msg)
return -EINVAL;
}
- /* Wakeup the poller before we wait for events to speed things
+ /*
+ * There is no need to mark the token as dispatched, wait_event()
+ * will block until the token completes.
+ *
+ * Wakeup the poller before we wait for events to speed things
* up on platforms or simulators where the interrupts aren't
* functional.
*/
opal_wake_poller();
- wait_event(opal_async_wait, test_bit(token, opal_async_complete_map));
- memcpy(msg, &opal_async_responses[token], sizeof(*msg));
+ wait_event(opal_async_wait, opal_async_tokens[token].state
+ == ASYNC_TOKEN_COMPLETED);
+ memcpy(msg, &opal_async_tokens[token].response, sizeof(*msg));
return 0;
}
EXPORT_SYMBOL_GPL(opal_async_wait_response);
+int opal_async_wait_response_interruptible(uint64_t token, struct opal_msg *msg)
+{
+ unsigned long flags;
+ int ret;
+
+ if (token >= opal_max_async_tokens) {
+ pr_err("%s: Invalid token passed\n", __func__);
+ return -EINVAL;
+ }
+
+ if (!msg) {
+ pr_err("%s: Invalid message pointer passed\n", __func__);
+ return -EINVAL;
+ }
+
+ /*
+ * The first time this gets called we mark the token as DISPATCHED
+ * so that if wait_event_interruptible() returns not zero and the
+ * caller frees the token, we know not to actually free the token
+ * until the response comes.
+ *
+ * Only change if the token is ALLOCATED - it may have been
+ * completed even before the caller gets around to calling this
+ * the first time.
+ *
+ * There is also a dirty great comment at the token allocation
+ * function that if the opal call returns OPAL_ASYNC_COMPLETION to
+ * the caller then the caller *must* call this or the not
+ * interruptible version before doing anything else with the
+ * token.
+ */
+ if (opal_async_tokens[token].state == ASYNC_TOKEN_ALLOCATED) {
+ spin_lock_irqsave(&opal_async_comp_lock, flags);
+ if (opal_async_tokens[token].state == ASYNC_TOKEN_ALLOCATED)
+ opal_async_tokens[token].state = ASYNC_TOKEN_DISPATCHED;
+ spin_unlock_irqrestore(&opal_async_comp_lock, flags);
+ }
+
+ /*
+ * Wakeup the poller before we wait for events to speed things
+ * up on platforms or simulators where the interrupts aren't
+ * functional.
+ */
+ opal_wake_poller();
+ ret = wait_event_interruptible(opal_async_wait,
+ opal_async_tokens[token].state ==
+ ASYNC_TOKEN_COMPLETED);
+ if (!ret)
+ memcpy(msg, &opal_async_tokens[token].response, sizeof(*msg));
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(opal_async_wait_response_interruptible);
+
+/* Called from interrupt context */
static int opal_async_comp_event(struct notifier_block *nb,
unsigned long msg_type, void *msg)
{
struct opal_msg *comp_msg = msg;
+ enum opal_async_token_state state;
unsigned long flags;
uint64_t token;
@@ -140,11 +225,17 @@ static int opal_async_comp_event(struct notifier_block *nb,
return 0;
token = be64_to_cpu(comp_msg->params[0]);
- memcpy(&opal_async_responses[token], comp_msg, sizeof(*comp_msg));
spin_lock_irqsave(&opal_async_comp_lock, flags);
- __set_bit(token, opal_async_complete_map);
+ state = opal_async_tokens[token].state;
+ opal_async_tokens[token].state = ASYNC_TOKEN_COMPLETED;
spin_unlock_irqrestore(&opal_async_comp_lock, flags);
+ if (state == ASYNC_TOKEN_ABANDONED) {
+ /* Free the token, no one else will */
+ opal_async_release_token(token);
+ return 0;
+ }
+ memcpy(&opal_async_tokens[token].response, comp_msg, sizeof(*comp_msg));
wake_up(&opal_async_wait);
return 0;
@@ -178,32 +269,23 @@ int __init opal_async_comp_init(void)
}
opal_max_async_tokens = be32_to_cpup(async);
- if (opal_max_async_tokens > N_ASYNC_COMPLETIONS)
- opal_max_async_tokens = N_ASYNC_COMPLETIONS;
+ opal_async_tokens = kcalloc(opal_max_async_tokens,
+ sizeof(*opal_async_tokens), GFP_KERNEL);
+ if (!opal_async_tokens) {
+ err = -ENOMEM;
+ goto out_opal_node;
+ }
err = opal_message_notifier_register(OPAL_MSG_ASYNC_COMP,
&opal_async_comp_nb);
if (err) {
pr_err("%s: Can't register OPAL event notifier (%d)\n",
__func__, err);
+ kfree(opal_async_tokens);
goto out_opal_node;
}
- opal_async_responses = kzalloc(
- sizeof(*opal_async_responses) * opal_max_async_tokens,
- GFP_KERNEL);
- if (!opal_async_responses) {
- pr_err("%s: Out of memory, failed to do asynchronous "
- "completion init\n", __func__);
- err = -ENOMEM;
- goto out_opal_node;
- }
-
- /* Initialize to 1 less than the maximum tokens available, as we may
- * require to pop one during emergency through synchronous call to
- * __opal_async_get_token()
- */
- sema_init(&opal_async_sem, opal_max_async_tokens - 1);
+ sema_init(&opal_async_sem, opal_max_async_tokens);
out_opal_node:
of_node_put(opal_node);
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
index d78fed728cdf..c9e1a4ff295c 100644
--- a/arch/powerpc/platforms/powernv/opal-hmi.c
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -1,5 +1,5 @@
/*
- * OPAL hypervisor Maintenance interrupt handling support in PowreNV.
+ * OPAL hypervisor Maintenance interrupt handling support in PowerNV.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index ecdcba9d1220..9d1b8c0aaf93 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -174,8 +174,14 @@ void opal_event_shutdown(void)
/* First free interrupts, which will also mask them */
for (i = 0; i < opal_irq_count; i++) {
- if (opal_irqs[i])
+ if (!opal_irqs[i])
+ continue;
+
+ if (in_interrupt())
+ disable_irq_nosync(opal_irqs[i]);
+ else
free_irq(opal_irqs[i], NULL);
+
opal_irqs[i] = 0;
}
}
diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c
index 4495f428b500..d9916ea62305 100644
--- a/arch/powerpc/platforms/powernv/opal-memory-errors.c
+++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c
@@ -1,5 +1,5 @@
/*
- * OPAL asynchronus Memory error handling support in PowreNV.
+ * OPAL asynchronus Memory error handling support in PowerNV.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c
index aa267f120033..0a7074bb91dc 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor.c
@@ -19,13 +19,10 @@
*/
#include <linux/delay.h>
-#include <linux/mutex.h>
#include <linux/of_platform.h>
#include <asm/opal.h>
#include <asm/machdep.h>
-static DEFINE_MUTEX(opal_sensor_mutex);
-
/*
* This will return sensor information to driver based on the requested sensor
* handle. A handle is an opaque id for the powernv, read by the driver from the
@@ -38,13 +35,9 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
__be32 data;
token = opal_async_get_token_interruptible();
- if (token < 0) {
- pr_err("%s: Couldn't get the token, returning\n", __func__);
- ret = token;
- goto out;
- }
+ if (token < 0)
+ return token;
- mutex_lock(&opal_sensor_mutex);
ret = opal_sensor_read(sensor_hndl, token, &data);
switch (ret) {
case OPAL_ASYNC_COMPLETION:
@@ -52,7 +45,7 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
if (ret) {
pr_err("%s: Failed to wait for the async response, %d\n",
__func__, ret);
- goto out_token;
+ goto out;
}
ret = opal_error_code(opal_get_async_rc(msg));
@@ -73,10 +66,8 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
break;
}
-out_token:
- mutex_unlock(&opal_sensor_mutex);
- opal_async_release_token(token);
out:
+ opal_async_release_token(token);
return ret;
}
EXPORT_SYMBOL_GPL(opal_get_sensor_data);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 8c1ede2d3f7e..6f4b00a2ac46 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -94,7 +94,7 @@ opal_return:
* bytes (always BE) since MSR:LE will end up fixed up as a side
* effect of the rfid.
*/
- FIXUP_ENDIAN
+ FIXUP_ENDIAN_HV
ld r2,PACATOC(r13);
lwz r4,8(r1);
ld r5,PPC_LR_STKOFF(r1);
@@ -120,7 +120,7 @@ opal_real_call:
hrfid
opal_return_realmode:
- FIXUP_ENDIAN
+ FIXUP_ENDIAN_HV
ld r2,PACATOC(r13);
lwz r11,8(r1);
ld r12,PPC_LR_STKOFF(r1)
@@ -307,6 +307,7 @@ OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO);
OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO);
OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC);
OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
+OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET);
OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 65c79ecf5a4d..041ddbd1fc57 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -998,6 +998,7 @@ int opal_error_code(int rc)
case OPAL_PARAMETER: return -EINVAL;
case OPAL_ASYNC_COMPLETION: return -EINPROGRESS;
+ case OPAL_BUSY:
case OPAL_BUSY_EVENT: return -EBUSY;
case OPAL_NO_MEM: return -ENOMEM;
case OPAL_PERMISSION: return -EPERM;
@@ -1037,3 +1038,4 @@ EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
/* Export this for KVM */
EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
EXPORT_SYMBOL_GPL(opal_int_eoi);
+EXPORT_SYMBOL_GPL(opal_error_code);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 57f9e55f4352..749055553064 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1002,9 +1002,12 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
}
/*
- * After doing so, there would be a "hole" in the /proc/iomem when
- * offset is a positive value. It looks like the device return some
- * mmio back to the system, which actually no one could use it.
+ * Since M64 BAR shares segments among all possible 256 PEs,
+ * we have to shift the beginning of PF IOV BAR to make it start from
+ * the segment which belongs to the PE number assigned to the first VF.
+ * This creates a "hole" in the /proc/iomem which could be used for
+ * allocating other resources so we reserve this area below and
+ * release when IOV is released.
*/
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
res = &dev->resource[i + PCI_IOV_RESOURCES];
@@ -1018,7 +1021,22 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n",
i, &res2, res, (offset > 0) ? "En" : "Dis",
num_vfs, offset);
+
+ if (offset < 0) {
+ devm_release_resource(&dev->dev, &pdn->holes[i]);
+ memset(&pdn->holes[i], 0, sizeof(pdn->holes[i]));
+ }
+
pci_update_resource(dev, i + PCI_IOV_RESOURCES);
+
+ if (offset > 0) {
+ pdn->holes[i].start = res2.start;
+ pdn->holes[i].end = res2.start + size * offset - 1;
+ pdn->holes[i].flags = IORESOURCE_BUS;
+ pdn->holes[i].name = "pnv_iov_reserved";
+ devm_request_resource(&dev->dev, res->parent,
+ &pdn->holes[i]);
+ }
}
return 0;
}
@@ -2779,7 +2797,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
return -EINVAL;
- if ((window_size > memory_hotplug_max()) || !is_power_of_2(window_size))
+ if (!is_power_of_2(window_size))
return -EINVAL;
/* Adjust direct table size from window_size and levels */
@@ -3293,8 +3311,7 @@ static void pnv_pci_ioda_fixup(void)
pnv_pci_ioda_create_dbgfs();
#ifdef CONFIG_EEH
- eeh_init();
- eeh_addr_cache_build();
+ pnv_eeh_post_init();
#endif
}
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index b47f9406d97e..b772d7473896 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -188,6 +188,9 @@ struct pnv_phb {
/* Bitmask for MMIO register usage */
unsigned long mmio_atsd_usage;
+
+ /* Do we need to explicitly flush the nest mmu? */
+ bool nmmu_flush;
} npu;
#ifdef CONFIG_CXL_BASE
@@ -235,6 +238,7 @@ extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
extern bool pnv_pci_enable_device_hook(struct pci_dev *dev);
extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
+extern int pnv_eeh_post_init(void);
extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
const char *fmt, ...);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index bbb73aa0eb8f..1edfbc1e40f4 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -36,6 +36,7 @@
#include <asm/opal.h>
#include <asm/kexec.h>
#include <asm/smp.h>
+#include <asm/tm.h>
#include "powernv.h"
@@ -290,6 +291,7 @@ static void __init pnv_setup_machdep_opal(void)
ppc_md.restart = pnv_restart;
pm_power_off = pnv_power_off;
ppc_md.halt = pnv_halt;
+ /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */
ppc_md.machine_check_exception = opal_machine_check;
ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
ppc_md.hmi_exception_early = opal_hmi_exception_early;
@@ -311,6 +313,28 @@ static int __init pnv_probe(void)
return 1;
}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void __init pnv_tm_init(void)
+{
+ if (!firmware_has_feature(FW_FEATURE_OPAL) ||
+ !pvr_version_is(PVR_POWER9) ||
+ early_cpu_has_feature(CPU_FTR_TM))
+ return;
+
+ if (opal_reinit_cpus(OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) != OPAL_SUCCESS)
+ return;
+
+ pr_info("Enabling TM (Transactional Memory) with Suspend Disabled\n");
+ cur_cpu_spec->cpu_features |= CPU_FTR_TM;
+ /* Make sure "normal" HTM is off (it should be) */
+ cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_HTM;
+ /* Turn on no suspend mode, and HTM no SC */
+ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NO_SUSPEND | \
+ PPC_FEATURE2_HTM_NOSC;
+ tm_suspend_disabled = true;
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
/*
* Returns the cpu frequency for 'cpu' in Hz. This is used by
* /proc/cpuinfo
@@ -319,7 +343,7 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu)
{
unsigned long ret_freq;
- ret_freq = cpufreq_quick_get(cpu) * 1000ul;
+ ret_freq = cpufreq_get(cpu) * 1000ul;
/*
* If the backend cpufreq driver does not exist,
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index c17f81e433f7..ba030669eca1 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -49,6 +49,13 @@
static void pnv_smp_setup_cpu(int cpu)
{
+ /*
+ * P9 workaround for CI vector load (see traps.c),
+ * enable the corresponding HMI interrupt
+ */
+ if (pvr_version_is(PVR_POWER9))
+ mtspr(SPRN_HMEER, mfspr(SPRN_HMEER) | PPC_BIT(17));
+
if (xive_enabled())
xive_smp_setup_cpu();
else if (cpu != boot_cpuid)
@@ -290,6 +297,54 @@ static void __init pnv_smp_probe(void)
}
}
+static int pnv_system_reset_exception(struct pt_regs *regs)
+{
+ if (smp_handle_nmi_ipi(regs))
+ return 1;
+ return 0;
+}
+
+static int pnv_cause_nmi_ipi(int cpu)
+{
+ int64_t rc;
+
+ if (cpu >= 0) {
+ rc = opal_signal_system_reset(get_hard_smp_processor_id(cpu));
+ if (rc != OPAL_SUCCESS)
+ return 0;
+ return 1;
+
+ } else if (cpu == NMI_IPI_ALL_OTHERS) {
+ bool success = true;
+ int c;
+
+
+ /*
+ * We do not use broadcasts (yet), because it's not clear
+ * exactly what semantics Linux wants or the firmware should
+ * provide.
+ */
+ for_each_online_cpu(c) {
+ if (c == smp_processor_id())
+ continue;
+
+ rc = opal_signal_system_reset(
+ get_hard_smp_processor_id(c));
+ if (rc != OPAL_SUCCESS)
+ success = false;
+ }
+ if (success)
+ return 1;
+
+ /*
+ * Caller will fall back to doorbells, which may pick
+ * up the remainders.
+ */
+ }
+
+ return 0;
+}
+
static struct smp_ops_t pnv_smp_ops = {
.message_pass = NULL, /* Use smp_muxed_ipi_message_pass */
.cause_ipi = NULL, /* Filled at runtime by pnv_smp_probe() */
@@ -308,6 +363,10 @@ static struct smp_ops_t pnv_smp_ops = {
/* This is called very early during platform setup_arch */
void __init pnv_smp_init(void)
{
+ if (opal_check_token(OPAL_SIGNAL_SYSTEM_RESET)) {
+ ppc_md.system_reset_exception = pnv_system_reset_exception;
+ pnv_smp_ops.cause_nmi_ipi = pnv_cause_nmi_ipi;
+ }
smp_ops = &pnv_smp_ops;
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c
new file mode 100644
index 000000000000..ca22f1eae050
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-debug.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2016-17 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include "vas.h"
+
+static struct dentry *vas_debugfs;
+
+static char *cop_to_str(int cop)
+{
+ switch (cop) {
+ case VAS_COP_TYPE_FAULT: return "Fault";
+ case VAS_COP_TYPE_842: return "NX-842 Normal Priority";
+ case VAS_COP_TYPE_842_HIPRI: return "NX-842 High Priority";
+ case VAS_COP_TYPE_GZIP: return "NX-GZIP Normal Priority";
+ case VAS_COP_TYPE_GZIP_HIPRI: return "NX-GZIP High Priority";
+ case VAS_COP_TYPE_FTW: return "Fast Thread-wakeup";
+ default: return "Unknown";
+ }
+}
+
+static int info_dbg_show(struct seq_file *s, void *private)
+{
+ struct vas_window *window = s->private;
+
+ mutex_lock(&vas_mutex);
+
+ /* ensure window is not unmapped */
+ if (!window->hvwc_map)
+ goto unlock;
+
+ seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop),
+ window->tx_win ? "Send" : "Receive");
+ seq_printf(s, "Pid : %d\n", window->pid);
+
+unlock:
+ mutex_unlock(&vas_mutex);
+ return 0;
+}
+
+static int info_dbg_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, info_dbg_show, inode->i_private);
+}
+
+static const struct file_operations info_fops = {
+ .open = info_dbg_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static inline void print_reg(struct seq_file *s, struct vas_window *win,
+ char *name, u32 reg)
+{
+ seq_printf(s, "0x%016llx %s\n", read_hvwc_reg(win, name, reg), name);
+}
+
+static int hvwc_dbg_show(struct seq_file *s, void *private)
+{
+ struct vas_window *window = s->private;
+
+ mutex_lock(&vas_mutex);
+
+ /* ensure window is not unmapped */
+ if (!window->hvwc_map)
+ goto unlock;
+
+ print_reg(s, window, VREG(LPID));
+ print_reg(s, window, VREG(PID));
+ print_reg(s, window, VREG(XLATE_MSR));
+ print_reg(s, window, VREG(XLATE_LPCR));
+ print_reg(s, window, VREG(XLATE_CTL));
+ print_reg(s, window, VREG(AMR));
+ print_reg(s, window, VREG(SEIDR));
+ print_reg(s, window, VREG(FAULT_TX_WIN));
+ print_reg(s, window, VREG(OSU_INTR_SRC_RA));
+ print_reg(s, window, VREG(HV_INTR_SRC_RA));
+ print_reg(s, window, VREG(PSWID));
+ print_reg(s, window, VREG(LFIFO_BAR));
+ print_reg(s, window, VREG(LDATA_STAMP_CTL));
+ print_reg(s, window, VREG(LDMA_CACHE_CTL));
+ print_reg(s, window, VREG(LRFIFO_PUSH));
+ print_reg(s, window, VREG(CURR_MSG_COUNT));
+ print_reg(s, window, VREG(LNOTIFY_AFTER_COUNT));
+ print_reg(s, window, VREG(LRX_WCRED));
+ print_reg(s, window, VREG(LRX_WCRED_ADDER));
+ print_reg(s, window, VREG(TX_WCRED));
+ print_reg(s, window, VREG(TX_WCRED_ADDER));
+ print_reg(s, window, VREG(LFIFO_SIZE));
+ print_reg(s, window, VREG(WINCTL));
+ print_reg(s, window, VREG(WIN_STATUS));
+ print_reg(s, window, VREG(WIN_CTX_CACHING_CTL));
+ print_reg(s, window, VREG(TX_RSVD_BUF_COUNT));
+ print_reg(s, window, VREG(LRFIFO_WIN_PTR));
+ print_reg(s, window, VREG(LNOTIFY_CTL));
+ print_reg(s, window, VREG(LNOTIFY_PID));
+ print_reg(s, window, VREG(LNOTIFY_LPID));
+ print_reg(s, window, VREG(LNOTIFY_TID));
+ print_reg(s, window, VREG(LNOTIFY_SCOPE));
+ print_reg(s, window, VREG(NX_UTIL_ADDER));
+unlock:
+ mutex_unlock(&vas_mutex);
+ return 0;
+}
+
+static int hvwc_dbg_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, hvwc_dbg_show, inode->i_private);
+}
+
+static const struct file_operations hvwc_fops = {
+ .open = hvwc_dbg_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+void vas_window_free_dbgdir(struct vas_window *window)
+{
+ if (window->dbgdir) {
+ debugfs_remove_recursive(window->dbgdir);
+ kfree(window->dbgname);
+ window->dbgdir = NULL;
+ window->dbgname = NULL;
+ }
+}
+
+void vas_window_init_dbgdir(struct vas_window *window)
+{
+ struct dentry *f, *d;
+
+ if (!window->vinst->dbgdir)
+ return;
+
+ window->dbgname = kzalloc(16, GFP_KERNEL);
+ if (!window->dbgname)
+ return;
+
+ snprintf(window->dbgname, 16, "w%d", window->winid);
+
+ d = debugfs_create_dir(window->dbgname, window->vinst->dbgdir);
+ if (IS_ERR(d))
+ goto free_name;
+
+ window->dbgdir = d;
+
+ f = debugfs_create_file("info", 0444, d, window, &info_fops);
+ if (IS_ERR(f))
+ goto remove_dir;
+
+ f = debugfs_create_file("hvwc", 0444, d, window, &hvwc_fops);
+ if (IS_ERR(f))
+ goto remove_dir;
+
+ return;
+
+free_name:
+ kfree(window->dbgname);
+ window->dbgname = NULL;
+
+remove_dir:
+ debugfs_remove_recursive(window->dbgdir);
+ window->dbgdir = NULL;
+}
+
+void vas_instance_init_dbgdir(struct vas_instance *vinst)
+{
+ struct dentry *d;
+
+ if (!vas_debugfs)
+ return;
+
+ vinst->dbgname = kzalloc(16, GFP_KERNEL);
+ if (!vinst->dbgname)
+ return;
+
+ snprintf(vinst->dbgname, 16, "v%d", vinst->vas_id);
+
+ d = debugfs_create_dir(vinst->dbgname, vas_debugfs);
+ if (IS_ERR(d))
+ goto free_name;
+
+ vinst->dbgdir = d;
+ return;
+
+free_name:
+ kfree(vinst->dbgname);
+ vinst->dbgname = NULL;
+ vinst->dbgdir = NULL;
+}
+
+void vas_init_dbgdir(void)
+{
+ vas_debugfs = debugfs_create_dir("vas", NULL);
+ if (IS_ERR(vas_debugfs))
+ vas_debugfs = NULL;
+}
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
index 5aae845b8cd9..2b3eb01ab110 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -16,7 +16,8 @@
#include <linux/log2.h>
#include <linux/rcupdate.h>
#include <linux/cred.h>
-
+#include <asm/switch_to.h>
+#include <asm/ppc-opcode.h>
#include "vas.h"
#include "copy-paste.h"
@@ -40,6 +41,16 @@ static void compute_paste_address(struct vas_window *window, u64 *addr, int *len
pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr);
}
+u64 vas_win_paste_addr(struct vas_window *win)
+{
+ u64 addr;
+
+ compute_paste_address(win, &addr, NULL);
+
+ return addr;
+}
+EXPORT_SYMBOL(vas_win_paste_addr);
+
static inline void get_hvwc_mmio_bar(struct vas_window *window,
u64 *start, int *len)
{
@@ -145,23 +156,37 @@ static void unmap_paste_region(struct vas_window *window)
}
/*
- * Unmap the MMIO regions for a window.
+ * Unmap the MMIO regions for a window. Hold the vas_mutex so we don't
+ * unmap when the window's debugfs dir is in use. This serializes close
+ * of a window even on another VAS instance but since its not a critical
+ * path, just minimize the time we hold the mutex for now. We can add
+ * a per-instance mutex later if necessary.
*/
static void unmap_winctx_mmio_bars(struct vas_window *window)
{
int len;
+ void *uwc_map;
+ void *hvwc_map;
u64 busaddr_start;
- if (window->hvwc_map) {
+ mutex_lock(&vas_mutex);
+
+ hvwc_map = window->hvwc_map;
+ window->hvwc_map = NULL;
+
+ uwc_map = window->uwc_map;
+ window->uwc_map = NULL;
+
+ mutex_unlock(&vas_mutex);
+
+ if (hvwc_map) {
get_hvwc_mmio_bar(window, &busaddr_start, &len);
- unmap_region(window->hvwc_map, busaddr_start, len);
- window->hvwc_map = NULL;
+ unmap_region(hvwc_map, busaddr_start, len);
}
- if (window->uwc_map) {
+ if (uwc_map) {
get_uwc_mmio_bar(window, &busaddr_start, &len);
- unmap_region(window->uwc_map, busaddr_start, len);
- window->uwc_map = NULL;
+ unmap_region(uwc_map, busaddr_start, len);
}
}
@@ -528,6 +553,9 @@ static void vas_window_free(struct vas_window *window)
struct vas_instance *vinst = window->vinst;
unmap_winctx_mmio_bars(window);
+
+ vas_window_free_dbgdir(window);
+
kfree(window);
vas_release_window_id(&vinst->ida, winid);
@@ -552,6 +580,8 @@ static struct vas_window *vas_window_alloc(struct vas_instance *vinst)
if (map_winctx_mmio_bars(window))
goto out_free;
+ vas_window_init_dbgdir(window);
+
return window;
out_free:
@@ -569,6 +599,32 @@ static void put_rx_win(struct vas_window *rxwin)
}
/*
+ * Find the user space receive window given the @pswid.
+ * - We must have a valid vasid and it must belong to this instance.
+ * (so both send and receive windows are on the same VAS instance)
+ * - The window must refer to an OPEN, FTW, RECEIVE window.
+ *
+ * NOTE: We access ->windows[] table and assume that vinst->mutex is held.
+ */
+static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid)
+{
+ int vasid, winid;
+ struct vas_window *rxwin;
+
+ decode_pswid(pswid, &vasid, &winid);
+
+ if (vinst->vas_id != vasid)
+ return ERR_PTR(-EINVAL);
+
+ rxwin = vinst->windows[winid];
+
+ if (!rxwin || rxwin->tx_win || rxwin->cop != VAS_COP_TYPE_FTW)
+ return ERR_PTR(-EINVAL);
+
+ return rxwin;
+}
+
+/*
* Get the VAS receive window associated with NX engine identified
* by @cop and if applicable, @pswid.
*
@@ -581,10 +637,10 @@ static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst,
mutex_lock(&vinst->mutex);
- if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI)
- rxwin = vinst->rxwin[cop] ?: ERR_PTR(-EINVAL);
+ if (cop == VAS_COP_TYPE_FTW)
+ rxwin = get_user_rxwin(vinst, pswid);
else
- rxwin = ERR_PTR(-EINVAL);
+ rxwin = vinst->rxwin[cop] ?: ERR_PTR(-EINVAL);
if (!IS_ERR(rxwin))
atomic_inc(&rxwin->num_txwins);
@@ -674,15 +730,18 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin,
winctx->rx_fifo = rxattr->rx_fifo;
winctx->rx_fifo_size = rxattr->rx_fifo_size;
- winctx->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
+ winctx->wcreds_max = rxwin->wcreds_max;
winctx->pin_win = rxattr->pin_win;
winctx->nx_win = rxattr->nx_win;
winctx->fault_win = rxattr->fault_win;
+ winctx->user_win = rxattr->user_win;
+ winctx->rej_no_credit = rxattr->rej_no_credit;
winctx->rx_word_mode = rxattr->rx_win_ord_mode;
winctx->tx_word_mode = rxattr->tx_win_ord_mode;
winctx->rx_wcred_mode = rxattr->rx_wcred_mode;
winctx->tx_wcred_mode = rxattr->tx_wcred_mode;
+ winctx->notify_early = rxattr->notify_early;
if (winctx->nx_win) {
winctx->data_stamp = true;
@@ -723,7 +782,10 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin,
static bool rx_win_args_valid(enum vas_cop_type cop,
struct vas_rx_win_attr *attr)
{
- dump_rx_win_attr(attr);
+ pr_debug("Rxattr: fault %d, notify %d, intr %d, early %d, fifo %d\n",
+ attr->fault_win, attr->notify_disable,
+ attr->intr_disable, attr->notify_early,
+ attr->rx_fifo_size);
if (cop >= VAS_COP_TYPE_MAX)
return false;
@@ -735,6 +797,9 @@ static bool rx_win_args_valid(enum vas_cop_type cop,
if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX)
return false;
+ if (attr->wcreds_max > VAS_RX_WCREDS_MAX)
+ return false;
+
if (attr->nx_win) {
/* cannot be fault or user window if it is nx */
if (attr->fault_win || attr->user_win)
@@ -835,6 +900,7 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
rxwin->nx_win = rxattr->nx_win;
rxwin->user_win = rxattr->user_win;
rxwin->cop = cop;
+ rxwin->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
if (rxattr->user_win)
rxwin->pid = task_pid_vnr(current);
@@ -884,21 +950,23 @@ static void init_winctx_for_txwin(struct vas_window *txwin,
*/
memset(winctx, 0, sizeof(struct vas_winctx));
- winctx->wcreds_max = txattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
+ winctx->wcreds_max = txwin->wcreds_max;
winctx->user_win = txattr->user_win;
winctx->nx_win = txwin->rxwin->nx_win;
winctx->pin_win = txattr->pin_win;
+ winctx->rej_no_credit = txattr->rej_no_credit;
+ winctx->rsvd_txbuf_enable = txattr->rsvd_txbuf_enable;
winctx->rx_wcred_mode = txattr->rx_wcred_mode;
winctx->tx_wcred_mode = txattr->tx_wcred_mode;
winctx->rx_word_mode = txattr->rx_win_ord_mode;
winctx->tx_word_mode = txattr->tx_win_ord_mode;
+ winctx->rsvd_txbuf_count = txattr->rsvd_txbuf_count;
- if (winctx->nx_win) {
+ winctx->intr_disable = true;
+ if (winctx->nx_win)
winctx->data_stamp = true;
- winctx->intr_disable = true;
- }
winctx->lpid = txattr->lpid;
winctx->pidr = txattr->pidr;
@@ -921,6 +989,9 @@ static bool tx_win_args_valid(enum vas_cop_type cop,
if (cop > VAS_COP_TYPE_MAX)
return false;
+ if (attr->wcreds_max > VAS_TX_WCREDS_MAX)
+ return false;
+
if (attr->user_win &&
(cop != VAS_COP_TYPE_FTW || attr->rsvd_txbuf_count))
return false;
@@ -940,6 +1011,14 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
if (!tx_win_args_valid(cop, attr))
return ERR_PTR(-EINVAL);
+ /*
+ * If caller did not specify a vasid but specified the PSWID of a
+ * receive window (applicable only to FTW windows), use the vasid
+ * from that receive window.
+ */
+ if (vasid == -1 && attr->pswid)
+ decode_pswid(attr->pswid, &vasid, NULL);
+
vinst = find_vas_instance(vasid);
if (!vinst) {
pr_devel("vasid %d not found!\n", vasid);
@@ -958,11 +1037,13 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
goto put_rxwin;
}
+ txwin->cop = cop;
txwin->tx_win = 1;
txwin->rxwin = rxwin;
txwin->nx_win = txwin->rxwin->nx_win;
txwin->pid = attr->pid;
txwin->user_win = attr->user_win;
+ txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT;
init_winctx_for_txwin(txwin, attr, &winctx);
@@ -984,6 +1065,14 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
}
}
+ /*
+ * Now that we have a send window, ensure context switch issues
+ * CP_ABORT for this thread.
+ */
+ rc = -EINVAL;
+ if (set_thread_uses_vas() < 0)
+ goto free_window;
+
set_vinst_win(vinst, txwin);
return txwin;
@@ -1038,50 +1127,110 @@ int vas_paste_crb(struct vas_window *txwin, int offset, bool re)
else
rc = -EINVAL;
- print_fifo_msg_count(txwin);
+ pr_debug("Txwin #%d: Msg count %llu\n", txwin->winid,
+ read_hvwc_reg(txwin, VREG(LRFIFO_PUSH)));
return rc;
}
EXPORT_SYMBOL_GPL(vas_paste_crb);
+/*
+ * If credit checking is enabled for this window, poll for the return
+ * of window credits (i.e for NX engines to process any outstanding CRBs).
+ * Since NX-842 waits for the CRBs to be processed before closing the
+ * window, we should not have to wait for too long.
+ *
+ * TODO: We retry in 10ms intervals now. We could/should probably peek at
+ * the VAS_LRFIFO_PUSH_OFFSET register to get an estimate of pending
+ * CRBs on the FIFO and compute the delay dynamically on each retry.
+ * But that is not really needed until we support NX-GZIP access from
+ * user space. (NX-842 driver waits for CSB and Fast thread-wakeup
+ * doesn't use credit checking).
+ */
+static void poll_window_credits(struct vas_window *window)
+{
+ u64 val;
+ int creds, mode;
+
+ val = read_hvwc_reg(window, VREG(WINCTL));
+ if (window->tx_win)
+ mode = GET_FIELD(VAS_WINCTL_TX_WCRED_MODE, val);
+ else
+ mode = GET_FIELD(VAS_WINCTL_RX_WCRED_MODE, val);
+
+ if (!mode)
+ return;
+retry:
+ if (window->tx_win) {
+ val = read_hvwc_reg(window, VREG(TX_WCRED));
+ creds = GET_FIELD(VAS_TX_WCRED, val);
+ } else {
+ val = read_hvwc_reg(window, VREG(LRX_WCRED));
+ creds = GET_FIELD(VAS_LRX_WCRED, val);
+ }
+
+ if (creds < window->wcreds_max) {
+ val = 0;
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(msecs_to_jiffies(10));
+ goto retry;
+ }
+}
+
+/*
+ * Wait for the window to go to "not-busy" state. It should only take a
+ * short time to queue a CRB, so window should not be busy for too long.
+ * Trying 5ms intervals.
+ */
static void poll_window_busy_state(struct vas_window *window)
{
int busy;
u64 val;
retry:
- /*
- * Poll Window Busy flag
- */
val = read_hvwc_reg(window, VREG(WIN_STATUS));
busy = GET_FIELD(VAS_WIN_BUSY, val);
if (busy) {
val = 0;
set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
+ schedule_timeout(msecs_to_jiffies(5));
goto retry;
}
}
+/*
+ * Have the hardware cast a window out of cache and wait for it to
+ * be completed.
+ *
+ * NOTE: It can take a relatively long time to cast the window context
+ * out of the cache. It is not strictly necessary to cast out if:
+ *
+ * - we clear the "Pin Window" bit (so hardware is free to evict)
+ *
+ * - we re-initialize the window context when it is reassigned.
+ *
+ * We do the former in vas_win_close() and latter in vas_win_open().
+ * So, ignoring the cast-out for now. We can add it as needed. If
+ * casting out becomes necessary we should consider offloading the
+ * job to a worker thread, so the window close can proceed quickly.
+ */
static void poll_window_castout(struct vas_window *window)
{
- int cached;
- u64 val;
+ /* stub for now */
+}
- /* Cast window context out of the cache */
-retry:
- val = read_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL));
- cached = GET_FIELD(VAS_WIN_CACHE_STATUS, val);
- if (cached) {
- val = 0ULL;
- val = SET_FIELD(VAS_CASTOUT_REQ, val, 1);
- val = SET_FIELD(VAS_PUSH_TO_MEM, val, 0);
- write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val);
+/*
+ * Unpin and close a window so no new requests are accepted and the
+ * hardware can evict this window from cache if necessary.
+ */
+static void unpin_close_window(struct vas_window *window)
+{
+ u64 val;
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- goto retry;
- }
+ val = read_hvwc_reg(window, VREG(WINCTL));
+ val = SET_FIELD(VAS_WINCTL_PIN, val, 0);
+ val = SET_FIELD(VAS_WINCTL_OPEN, val, 0);
+ write_hvwc_reg(window, VREG(WINCTL), val);
}
/*
@@ -1098,8 +1247,6 @@ retry:
*/
int vas_win_close(struct vas_window *window)
{
- u64 val;
-
if (!window)
return 0;
@@ -1115,11 +1262,9 @@ int vas_win_close(struct vas_window *window)
poll_window_busy_state(window);
- /* Unpin window from cache and close it */
- val = read_hvwc_reg(window, VREG(WINCTL));
- val = SET_FIELD(VAS_WINCTL_PIN, val, 0);
- val = SET_FIELD(VAS_WINCTL_OPEN, val, 0);
- write_hvwc_reg(window, VREG(WINCTL), val);
+ unpin_close_window(window);
+
+ poll_window_credits(window);
poll_window_castout(window);
@@ -1132,3 +1277,12 @@ int vas_win_close(struct vas_window *window)
return 0;
}
EXPORT_SYMBOL_GPL(vas_win_close);
+
+/*
+ * Return a system-wide unique window id for the window @win.
+ */
+u32 vas_win_id(struct vas_window *win)
+{
+ return encode_pswid(win->vinst->vas_id, win->winid);
+}
+EXPORT_SYMBOL_GPL(vas_win_id);
diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c
index 565a4878fefa..c488621dbec3 100644
--- a/arch/powerpc/platforms/powernv/vas.c
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -18,15 +18,18 @@
#include <linux/of_platform.h>
#include <linux/of_address.h>
#include <linux/of.h>
+#include <asm/prom.h>
#include "vas.h"
-static DEFINE_MUTEX(vas_mutex);
+DEFINE_MUTEX(vas_mutex);
static LIST_HEAD(vas_instances);
+static DEFINE_PER_CPU(int, cpu_vas_id);
+
static int init_vas_instance(struct platform_device *pdev)
{
- int rc, vasid;
+ int rc, cpu, vasid;
struct resource *res;
struct vas_instance *vinst;
struct device_node *dn = pdev->dev.of_node;
@@ -74,10 +77,17 @@ static int init_vas_instance(struct platform_device *pdev)
"paste_win_id_shift 0x%llx\n", pdev->name, vasid,
vinst->paste_base_addr, vinst->paste_win_id_shift);
+ for_each_possible_cpu(cpu) {
+ if (cpu_to_chip_id(cpu) == of_get_ibm_chip_id(dn))
+ per_cpu(cpu_vas_id, cpu) = vasid;
+ }
+
mutex_lock(&vas_mutex);
list_add(&vinst->node, &vas_instances);
mutex_unlock(&vas_mutex);
+ vas_instance_init_dbgdir(vinst);
+
dev_set_drvdata(&pdev->dev, vinst);
return 0;
@@ -98,6 +108,10 @@ struct vas_instance *find_vas_instance(int vasid)
struct vas_instance *vinst;
mutex_lock(&vas_mutex);
+
+ if (vasid == -1)
+ vasid = per_cpu(cpu_vas_id, smp_processor_id());
+
list_for_each(ent, &vas_instances) {
vinst = list_entry(ent, struct vas_instance, node);
if (vinst->vas_id == vasid) {
@@ -111,6 +125,17 @@ struct vas_instance *find_vas_instance(int vasid)
return NULL;
}
+int chip_to_vas_id(int chipid)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ if (cpu_to_chip_id(cpu) == chipid)
+ return per_cpu(cpu_vas_id, cpu);
+ }
+ return -1;
+}
+
static int vas_probe(struct platform_device *pdev)
{
return init_vas_instance(pdev);
@@ -134,6 +159,8 @@ static int __init vas_init(void)
int found = 0;
struct device_node *dn;
+ vas_init_dbgdir();
+
platform_driver_register(&vas_driver);
for_each_compatible_node(dn, NULL, "ibm,vas") {
diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h
index 38dee5d50f31..ae0100fd35bb 100644
--- a/arch/powerpc/platforms/powernv/vas.h
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -13,6 +13,8 @@
#include <linux/idr.h>
#include <asm/vas.h>
#include <linux/io.h>
+#include <linux/dcache.h>
+#include <linux/mutex.h>
/*
* Overview of Virtual Accelerator Switchboard (VAS).
@@ -106,8 +108,8 @@
*
* TODO: Needs tuning for per-process credits
*/
-#define VAS_WCREDS_MIN 16
-#define VAS_WCREDS_MAX ((64 << 10) - 1)
+#define VAS_RX_WCREDS_MAX ((64 << 10) - 1)
+#define VAS_TX_WCREDS_MAX ((4 << 10) - 1)
#define VAS_WCREDS_DEFAULT (1 << 10)
/*
@@ -259,6 +261,16 @@
#define VAS_NX_UTIL_ADDER PPC_BITMASK(32, 63)
/*
+ * VREG(x):
+ * Expand a register's short name (eg: LPID) into two parameters:
+ * - the register's short name in string form ("LPID"), and
+ * - the name of the macro (eg: VAS_LPID_OFFSET), defining the
+ * register's offset in the window context
+ */
+#define VREG_SFX(n, s) __stringify(n), VAS_##n##s
+#define VREG(r) VREG_SFX(r, _OFFSET)
+
+/*
* Local Notify Scope Control Register. (Receive windows only).
*/
enum vas_notify_scope {
@@ -307,6 +319,9 @@ struct vas_instance {
struct mutex mutex;
struct vas_window *rxwin[VAS_COP_TYPE_MAX];
struct vas_window *windows[VAS_WINDOWS_PER_CHIP];
+
+ char *dbgname;
+ struct dentry *dbgdir;
};
/*
@@ -322,6 +337,10 @@ struct vas_window {
void *hvwc_map; /* HV window context */
void *uwc_map; /* OS/User window context */
pid_t pid; /* Linux process id of owner */
+ int wcreds_max; /* Window credits */
+
+ char *dbgname;
+ struct dentry *dbgdir;
/* Fields applicable only to send windows */
void *paste_kaddr;
@@ -383,45 +402,23 @@ struct vas_winctx {
enum vas_notify_after_count notify_after_count;
};
-extern struct vas_instance *find_vas_instance(int vasid);
+extern struct mutex vas_mutex;
-/*
- * VREG(x):
- * Expand a register's short name (eg: LPID) into two parameters:
- * - the register's short name in string form ("LPID"), and
- * - the name of the macro (eg: VAS_LPID_OFFSET), defining the
- * register's offset in the window context
- */
-#define VREG_SFX(n, s) __stringify(n), VAS_##n##s
-#define VREG(r) VREG_SFX(r, _OFFSET)
-
-#ifdef vas_debug
-static inline void dump_rx_win_attr(struct vas_rx_win_attr *attr)
-{
- pr_err("fault %d, notify %d, intr %d early %d\n",
- attr->fault_win, attr->notify_disable,
- attr->intr_disable, attr->notify_early);
-
- pr_err("rx_fifo_size %d, max value %d\n",
- attr->rx_fifo_size, VAS_RX_FIFO_SIZE_MAX);
-}
+extern struct vas_instance *find_vas_instance(int vasid);
+extern void vas_init_dbgdir(void);
+extern void vas_instance_init_dbgdir(struct vas_instance *vinst);
+extern void vas_window_init_dbgdir(struct vas_window *win);
+extern void vas_window_free_dbgdir(struct vas_window *win);
static inline void vas_log_write(struct vas_window *win, char *name,
void *regptr, u64 val)
{
if (val)
- pr_err("%swin #%d: %s reg %p, val 0x%016llx\n",
+ pr_debug("%swin #%d: %s reg %p, val 0x%016llx\n",
win->tx_win ? "Tx" : "Rx", win->winid, name,
regptr, val);
}
-#else /* vas_debug */
-
-#define vas_log_write(win, name, reg, val)
-#define dump_rx_win_attr(attr)
-
-#endif /* vas_debug */
-
static inline void write_uwc_reg(struct vas_window *win, char *name,
s32 reg, u64 val)
{
@@ -450,18 +447,32 @@ static inline u64 read_hvwc_reg(struct vas_window *win,
return in_be64(win->hvwc_map+reg);
}
-#ifdef vas_debug
-
-static void print_fifo_msg_count(struct vas_window *txwin)
+/*
+ * Encode/decode the Partition Send Window ID (PSWID) for a window in
+ * a way that we can uniquely identify any window in the system. i.e.
+ * we should be able to locate the 'struct vas_window' given the PSWID.
+ *
+ * Bits Usage
+ * 0:7 VAS id (8 bits)
+ * 8:15 Unused, 0 (3 bits)
+ * 16:31 Window id (16 bits)
+ */
+static inline u32 encode_pswid(int vasid, int winid)
{
- uint64_t read_hvwc_reg(struct vas_window *w, char *n, uint64_t o);
- pr_devel("Winid %d, Msg count %llu\n", txwin->winid,
- (uint64_t)read_hvwc_reg(txwin, VREG(LRFIFO_PUSH)));
-}
-#else /* vas_debug */
+ u32 pswid = 0;
-#define print_fifo_msg_count(window)
+ pswid |= vasid << (31 - 7);
+ pswid |= winid;
-#endif /* vas_debug */
+ return pswid;
+}
+
+static inline void decode_pswid(u32 pswid, int *vasid, int *winid)
+{
+ if (vasid)
+ *vasid = pswid >> (31 - 7) & 0xFF;
+ if (winid)
+ *winid = pswid & 0xFFFF;
+}
#endif /* _VAS_H */
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index fadb95efbb9e..a7d14aa7bb7c 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -363,6 +363,7 @@ static int dlpar_online_cpu(struct device_node *dn)
BUG_ON(get_cpu_current_state(cpu)
!= CPU_STATE_OFFLINE);
cpu_maps_update_done();
+ timed_topology_update(1);
rc = device_online(get_cpu_device(cpu));
if (rc)
goto out;
@@ -533,6 +534,7 @@ static int dlpar_offline_cpu(struct device_node *dn)
set_preferred_offline_state(cpu,
CPU_STATE_OFFLINE);
cpu_maps_update_done();
+ timed_topology_update(1);
rc = device_offline(get_cpu_device(cpu));
if (rc)
goto out;
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 7c181467d0ad..69921f72e2da 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -55,23 +55,23 @@
static struct iommu_table_group *iommu_pseries_alloc_group(int node)
{
- struct iommu_table_group *table_group = NULL;
- struct iommu_table *tbl = NULL;
- struct iommu_table_group_link *tgl = NULL;
+ struct iommu_table_group *table_group;
+ struct iommu_table *tbl;
+ struct iommu_table_group_link *tgl;
table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL,
node);
if (!table_group)
- goto fail_exit;
+ return NULL;
tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node);
if (!tbl)
- goto fail_exit;
+ goto free_group;
tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
node);
if (!tgl)
- goto fail_exit;
+ goto free_table;
INIT_LIST_HEAD_RCU(&tbl->it_group_list);
kref_init(&tbl->it_kref);
@@ -82,11 +82,10 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node)
return table_group;
-fail_exit:
- kfree(tgl);
- kfree(table_group);
+free_table:
kfree(tbl);
-
+free_group:
+ kfree(table_group);
return NULL;
}
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 495ba4e7336d..0ee4a469a4ae 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -93,7 +93,7 @@ void vpa_init(int cpu)
return;
}
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
/*
* PAPR says this feature is SLB-Buffer but firmware never
* reports that. All SPLPAR support SLB shadow buffer.
@@ -106,7 +106,7 @@ void vpa_init(int cpu)
"cpu %d (hw %d) of area %lx failed with %ld\n",
cpu, hwcpu, addr, ret);
}
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
/*
* Register dispatch trace log, if one has been allocated.
@@ -129,7 +129,7 @@ void vpa_init(int cpu)
}
}
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
unsigned long vpn, unsigned long pa,
@@ -824,7 +824,7 @@ void arch_free_page(struct page *page, int order)
EXPORT_SYMBOL(arch_free_page);
#endif /* CONFIG_PPC_SMLPAR */
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_TRACEPOINTS
#ifdef HAVE_JUMP_LABEL
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index 779fc2a1c8f7..b2706c483067 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -485,7 +485,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
seq_printf(m, "shared_processor_mode=%d\n",
lppaca_shared_proc(get_lppaca()));
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
seq_printf(m, "slb_size=%d\n", mmu_slb_size);
#endif
parse_em_data(m);
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 12277bc9fd9e..d86938260a86 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -1592,6 +1592,8 @@ ATTRIBUTE_GROUPS(vio_dev);
void vio_unregister_device(struct vio_dev *viodev)
{
device_unregister(&viodev->dev);
+ if (viodev->family == VDEVICE)
+ irq_dispose_mapping(viodev->irq);
}
EXPORT_SYMBOL(vio_unregister_device);
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index c60e84e4558d..1b307c80b401 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -184,7 +184,7 @@ static int axon_ram_probe(struct platform_device *device)
static int axon_ram_bank_id = -1;
struct axon_ram_bank *bank;
struct resource resource;
- int rc = 0;
+ int rc;
axon_ram_bank_id++;
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index 16f1edd78c40..535cf1f6941c 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -846,12 +846,12 @@ void ipic_disable_mcp(enum ipic_mcp_irq mcp_irq)
u32 ipic_get_mcp_status(void)
{
- return ipic_read(primary_ipic->regs, IPIC_SERMR);
+ return ipic_read(primary_ipic->regs, IPIC_SERSR);
}
void ipic_clear_mcp_status(u32 mask)
{
- ipic_write(primary_ipic->regs, IPIC_SERMR, mask);
+ ipic_write(primary_ipic->regs, IPIC_SERSR, mask);
}
/* Return an interrupt vector or 0 if no interrupt is pending. */
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 33351c6704b1..1b2d8cb49abb 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -28,6 +28,7 @@
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/ctype.h>
+#include <linux/highmem.h>
#include <asm/debugfs.h>
#include <asm/ptrace.h>
@@ -127,6 +128,7 @@ static void byterev(unsigned char *, int);
static void memex(void);
static int bsesc(void);
static void dump(void);
+static void show_pte(unsigned long);
static void prdump(unsigned long, long);
static int ppc_inst_dump(unsigned long, long, int);
static void dump_log_buf(void);
@@ -234,6 +236,7 @@ Commands:\n\
#endif
"\
dr dump stream of raw bytes\n\
+ dv dump virtual address translation \n\
dt dump the tracing buffers (uses printk)\n\
dtc dump the tracing buffers for current CPU (uses printk)\n\
"
@@ -278,6 +281,7 @@ Commands:\n\
#elif defined(CONFIG_44x) || defined(CONFIG_PPC_BOOK3E)
" u dump TLB\n"
#endif
+" U show uptime information\n"
" ? help\n"
" # n limit output to n lines per page (for dp, dpa, dl)\n"
" zr reboot\n\
@@ -530,14 +534,19 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
waiting:
secondary = 1;
+ spin_begin();
while (secondary && !xmon_gate) {
if (in_xmon == 0) {
- if (fromipi)
+ if (fromipi) {
+ spin_end();
goto leave;
+ }
secondary = test_and_set_bit(0, &in_xmon);
}
- barrier();
+ spin_cpu_relax();
+ touch_nmi_watchdog();
}
+ spin_end();
if (!secondary && !xmon_gate) {
/* we are the first cpu to come in */
@@ -568,21 +577,25 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
mb();
xmon_gate = 1;
barrier();
+ touch_nmi_watchdog();
}
cmdloop:
while (in_xmon) {
if (secondary) {
+ spin_begin();
if (cpu == xmon_owner) {
if (!test_and_set_bit(0, &xmon_taken)) {
secondary = 0;
+ spin_end();
continue;
}
/* missed it */
while (cpu == xmon_owner)
- barrier();
+ spin_cpu_relax();
}
- barrier();
+ spin_cpu_relax();
+ touch_nmi_watchdog();
} else {
cmd = cmds(regs);
if (cmd != 0) {
@@ -896,6 +909,26 @@ static void remove_cpu_bpts(void)
write_ciabr(0);
}
+/* Based on uptime_proc_show(). */
+static void
+show_uptime(void)
+{
+ struct timespec uptime;
+
+ if (setjmp(bus_error_jmp) == 0) {
+ catch_memory_errors = 1;
+ sync();
+
+ get_monotonic_boottime(&uptime);
+ printf("Uptime: %lu.%.2lu seconds\n", (unsigned long)uptime.tv_sec,
+ ((unsigned long)uptime.tv_nsec / (NSEC_PER_SEC/100)));
+
+ sync();
+ __delay(200); \
+ }
+ catch_memory_errors = 0;
+}
+
static void set_lpp_cmd(void)
{
unsigned long lpp;
@@ -1031,6 +1064,9 @@ cmds(struct pt_regs *excp)
dump_tlb_book3e();
break;
#endif
+ case 'U':
+ show_uptime();
+ break;
default:
printf("Unrecognized command: ");
do {
@@ -2279,7 +2315,7 @@ static void dump_tracing(void)
static void dump_one_paca(int cpu)
{
struct paca_struct *p;
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
int i = 0;
#endif
@@ -2320,7 +2356,7 @@ static void dump_one_paca(int cpu)
DUMP(p, hw_cpu_id, "x");
DUMP(p, cpu_start, "x");
DUMP(p, kexec_state, "x");
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
for (i = 0; i < SLB_NUM_BOLTED; i++) {
u64 esid, vsid;
@@ -2351,6 +2387,7 @@ static void dump_one_paca(int cpu)
#endif
DUMP(p, __current, "p");
DUMP(p, kstack, "lx");
+ printf(" kstack_base = 0x%016lx\n", p->kstack & ~(THREAD_SIZE - 1));
DUMP(p, stab_rr, "lx");
DUMP(p, saved_r1, "lx");
DUMP(p, trap_save, "x");
@@ -2475,6 +2512,11 @@ static void dump_xives(void)
unsigned long num;
int c;
+ if (!xive_enabled()) {
+ printf("Xive disabled on this system\n");
+ return;
+ }
+
c = inchar();
if (c == 'a') {
dump_all_xives();
@@ -2574,6 +2616,9 @@ dump(void)
dump_log_buf();
} else if (c == 'o') {
dump_opal_msglog();
+ } else if (c == 'v') {
+ /* dump virtual to physical translation */
+ show_pte(adrs);
} else if (c == 'r') {
scanhex(&ndump);
if (ndump == 0)
@@ -2907,6 +2952,116 @@ static void show_task(struct task_struct *tsk)
tsk->comm);
}
+#ifdef CONFIG_PPC_BOOK3S_64
+void format_pte(void *ptep, unsigned long pte)
+{
+ printf("ptep @ 0x%016lx = 0x%016lx\n", (unsigned long)ptep, pte);
+ printf("Maps physical address = 0x%016lx\n", pte & PTE_RPN_MASK);
+
+ printf("Flags = %s%s%s%s%s\n",
+ (pte & _PAGE_ACCESSED) ? "Accessed " : "",
+ (pte & _PAGE_DIRTY) ? "Dirty " : "",
+ (pte & _PAGE_READ) ? "Read " : "",
+ (pte & _PAGE_WRITE) ? "Write " : "",
+ (pte & _PAGE_EXEC) ? "Exec " : "");
+}
+
+static void show_pte(unsigned long addr)
+{
+ unsigned long tskv = 0;
+ struct task_struct *tsk = NULL;
+ struct mm_struct *mm;
+ pgd_t *pgdp, *pgdir;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ if (!scanhex(&tskv))
+ mm = &init_mm;
+ else
+ tsk = (struct task_struct *)tskv;
+
+ if (tsk == NULL)
+ mm = &init_mm;
+ else
+ mm = tsk->active_mm;
+
+ if (setjmp(bus_error_jmp) != 0) {
+ catch_memory_errors = 0;
+ printf("*** Error dumping pte for task %p\n", tsk);
+ return;
+ }
+
+ catch_memory_errors = 1;
+ sync();
+
+ if (mm == &init_mm) {
+ pgdp = pgd_offset_k(addr);
+ pgdir = pgd_offset_k(0);
+ } else {
+ pgdp = pgd_offset(mm, addr);
+ pgdir = pgd_offset(mm, 0);
+ }
+
+ if (pgd_none(*pgdp)) {
+ printf("no linux page table for address\n");
+ return;
+ }
+
+ printf("pgd @ 0x%016lx\n", pgdir);
+
+ if (pgd_huge(*pgdp)) {
+ format_pte(pgdp, pgd_val(*pgdp));
+ return;
+ }
+ printf("pgdp @ 0x%016lx = 0x%016lx\n", pgdp, pgd_val(*pgdp));
+
+ pudp = pud_offset(pgdp, addr);
+
+ if (pud_none(*pudp)) {
+ printf("No valid PUD\n");
+ return;
+ }
+
+ if (pud_huge(*pudp)) {
+ format_pte(pudp, pud_val(*pudp));
+ return;
+ }
+
+ printf("pudp @ 0x%016lx = 0x%016lx\n", pudp, pud_val(*pudp));
+
+ pmdp = pmd_offset(pudp, addr);
+
+ if (pmd_none(*pmdp)) {
+ printf("No valid PMD\n");
+ return;
+ }
+
+ if (pmd_huge(*pmdp)) {
+ format_pte(pmdp, pmd_val(*pmdp));
+ return;
+ }
+ printf("pmdp @ 0x%016lx = 0x%016lx\n", pmdp, pmd_val(*pmdp));
+
+ ptep = pte_offset_map(pmdp, addr);
+ if (pte_none(*ptep)) {
+ printf("no valid PTE\n");
+ return;
+ }
+
+ format_pte(ptep, pte_val(*ptep));
+
+ sync();
+ __delay(200);
+ catch_memory_errors = 0;
+}
+#else
+static void show_pte(unsigned long addr)
+{
+ printf("show_pte not yet implemented\n");
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
static void show_tasks(void)
{
unsigned long tskv;
@@ -3224,7 +3379,7 @@ static void xmon_print_symbol(unsigned long address, const char *mid,
printf("%s", after);
}
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
void dump_segments(void)
{
int i;