summaryrefslogtreecommitdiffstats
path: root/kernel/arch/s390
diff options
context:
space:
mode:
authorJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-11 10:41:07 +0300
committerJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-13 08:17:18 +0300
commite09b41010ba33a20a87472ee821fa407a5b8da36 (patch)
treed10dc367189862e7ca5c592f033dc3726e1df4e3 /kernel/arch/s390
parentf93b97fd65072de626c074dbe099a1fff05ce060 (diff)
These changes are the raw update to linux-4.4.6-rt14. Kernel sources
are taken from kernel.org, and rt patch from the rt wiki download page. During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
Diffstat (limited to 'kernel/arch/s390')
-rw-r--r--kernel/arch/s390/Kbuild1
-rw-r--r--kernel/arch/s390/Kconfig79
-rw-r--r--kernel/arch/s390/Makefile2
-rw-r--r--kernel/arch/s390/boot/compressed/Makefile2
-rw-r--r--kernel/arch/s390/configs/default_defconfig19
-rw-r--r--kernel/arch/s390/configs/gcov_defconfig17
-rw-r--r--kernel/arch/s390/configs/performance_defconfig21
-rw-r--r--kernel/arch/s390/configs/zfcpdump_defconfig5
-rw-r--r--kernel/arch/s390/crypto/aes_s390.c3
-rw-r--r--kernel/arch/s390/crypto/des_s390.c3
-rw-r--r--kernel/arch/s390/crypto/ghash_s390.c3
-rw-r--r--kernel/arch/s390/crypto/prng.c4
-rw-r--r--kernel/arch/s390/crypto/sha.h2
-rw-r--r--kernel/arch/s390/crypto/sha1_s390.c3
-rw-r--r--kernel/arch/s390/crypto/sha256_s390.c3
-rw-r--r--kernel/arch/s390/crypto/sha512_s390.c3
-rw-r--r--kernel/arch/s390/defconfig12
-rw-r--r--kernel/arch/s390/hypfs/hypfs_diag.c10
-rw-r--r--kernel/arch/s390/hypfs/hypfs_diag0c.c2
-rw-r--r--kernel/arch/s390/hypfs/hypfs_sprp.c13
-rw-r--r--kernel/arch/s390/hypfs/hypfs_vm.c2
-rw-r--r--kernel/arch/s390/hypfs/inode.c7
-rw-r--r--kernel/arch/s390/include/asm/Kbuild3
-rw-r--r--kernel/arch/s390/include/asm/appldata.h2
-rw-r--r--kernel/arch/s390/include/asm/atomic.h43
-rw-r--r--kernel/arch/s390/include/asm/barrier.h14
-rw-r--r--kernel/arch/s390/include/asm/bitops.h55
-rw-r--r--kernel/arch/s390/include/asm/cio.h11
-rw-r--r--kernel/arch/s390/include/asm/cmb.h1
-rw-r--r--kernel/arch/s390/include/asm/cmpxchg.h32
-rw-r--r--kernel/arch/s390/include/asm/cpu.h2
-rw-r--r--kernel/arch/s390/include/asm/cpu_mf.h5
-rw-r--r--kernel/arch/s390/include/asm/cpufeature.h29
-rw-r--r--kernel/arch/s390/include/asm/diag.h29
-rw-r--r--kernel/arch/s390/include/asm/dma-mapping.h55
-rw-r--r--kernel/arch/s390/include/asm/elf.h13
-rw-r--r--kernel/arch/s390/include/asm/etr.h13
-rw-r--r--kernel/arch/s390/include/asm/fpu/api.h30
-rw-r--r--kernel/arch/s390/include/asm/fpu/internal.h69
-rw-r--r--kernel/arch/s390/include/asm/fpu/types.h25
-rw-r--r--kernel/arch/s390/include/asm/hugetlb.h5
-rw-r--r--kernel/arch/s390/include/asm/idle.h2
-rw-r--r--kernel/arch/s390/include/asm/io.h3
-rw-r--r--kernel/arch/s390/include/asm/ipl.h4
-rw-r--r--kernel/arch/s390/include/asm/irq.h14
-rw-r--r--kernel/arch/s390/include/asm/jump_label.h19
-rw-r--r--kernel/arch/s390/include/asm/kvm_host.h21
-rw-r--r--kernel/arch/s390/include/asm/kvm_para.h67
-rw-r--r--kernel/arch/s390/include/asm/linkage.h22
-rw-r--r--kernel/arch/s390/include/asm/lowcore.h11
-rw-r--r--kernel/arch/s390/include/asm/mmu_context.h16
-rw-r--r--kernel/arch/s390/include/asm/mmzone.h16
-rw-r--r--kernel/arch/s390/include/asm/nmi.h98
-rw-r--r--kernel/arch/s390/include/asm/numa.h35
-rw-r--r--kernel/arch/s390/include/asm/pci.h26
-rw-r--r--kernel/arch/s390/include/asm/pci_dma.h5
-rw-r--r--kernel/arch/s390/include/asm/perf_event.h8
-rw-r--r--kernel/arch/s390/include/asm/pgalloc.h24
-rw-r--r--kernel/arch/s390/include/asm/pgtable.h113
-rw-r--r--kernel/arch/s390/include/asm/processor.h129
-rw-r--r--kernel/arch/s390/include/asm/ptrace.h11
-rw-r--r--kernel/arch/s390/include/asm/sclp.h50
-rw-r--r--kernel/arch/s390/include/asm/setup.h50
-rw-r--r--kernel/arch/s390/include/asm/smp.h2
-rw-r--r--kernel/arch/s390/include/asm/spinlock.h3
-rw-r--r--kernel/arch/s390/include/asm/switch_to.h135
-rw-r--r--kernel/arch/s390/include/asm/thread_info.h22
-rw-r--r--kernel/arch/s390/include/asm/timex.h5
-rw-r--r--kernel/arch/s390/include/asm/topology.h42
-rw-r--r--kernel/arch/s390/include/asm/trace/diag.h43
-rw-r--r--kernel/arch/s390/include/asm/unistd.h12
-rw-r--r--kernel/arch/s390/include/asm/vx-insn.h480
-rw-r--r--kernel/arch/s390/include/uapi/asm/unistd.h48
-rw-r--r--kernel/arch/s390/kernel/Makefile13
-rw-r--r--kernel/arch/s390/kernel/asm-offsets.c288
-rw-r--r--kernel/arch/s390/kernel/base.S21
-rw-r--r--kernel/arch/s390/kernel/compat_signal.c78
-rw-r--r--kernel/arch/s390/kernel/compat_wrapper.c73
-rw-r--r--kernel/arch/s390/kernel/cpcmd.c2
-rw-r--r--kernel/arch/s390/kernel/crash_dump.c48
-rw-r--r--kernel/arch/s390/kernel/debug.c11
-rw-r--r--kernel/arch/s390/kernel/diag.c134
-rw-r--r--kernel/arch/s390/kernel/dis.c17
-rw-r--r--kernel/arch/s390/kernel/early.c15
-rw-r--r--kernel/arch/s390/kernel/entry.S675
-rw-r--r--kernel/arch/s390/kernel/entry.h3
-rw-r--r--kernel/arch/s390/kernel/head.S100
-rw-r--r--kernel/arch/s390/kernel/head64.S7
-rw-r--r--kernel/arch/s390/kernel/ipl.c74
-rw-r--r--kernel/arch/s390/kernel/irq.c1
-rw-r--r--kernel/arch/s390/kernel/jump_label.c11
-rw-r--r--kernel/arch/s390/kernel/nmi.c129
-rw-r--r--kernel/arch/s390/kernel/perf_cpum_cf.c47
-rw-r--r--kernel/arch/s390/kernel/perf_cpum_sf.c15
-rw-r--r--kernel/arch/s390/kernel/process.c61
-rw-r--r--kernel/arch/s390/kernel/processor.c14
-rw-r--r--kernel/arch/s390/kernel/ptrace.c192
-rw-r--r--kernel/arch/s390/kernel/runtime_instr.c64
-rw-r--r--kernel/arch/s390/kernel/s390_ksyms.c2
-rw-r--r--kernel/arch/s390/kernel/sclp.S355
-rw-r--r--kernel/arch/s390/kernel/sclp.c160
-rw-r--r--kernel/arch/s390/kernel/setup.c35
-rw-r--r--kernel/arch/s390/kernel/signal.c48
-rw-r--r--kernel/arch/s390/kernel/smp.c169
-rw-r--r--kernel/arch/s390/kernel/suspend.c2
-rw-r--r--kernel/arch/s390/kernel/swsusp.S38
-rw-r--r--kernel/arch/s390/kernel/syscalls.S132
-rw-r--r--kernel/arch/s390/kernel/time.c63
-rw-r--r--kernel/arch/s390/kernel/topology.c59
-rw-r--r--kernel/arch/s390/kernel/trace.c29
-rw-r--r--kernel/arch/s390/kernel/traps.c55
-rw-r--r--kernel/arch/s390/kernel/vdso.c2
-rw-r--r--kernel/arch/s390/kernel/vdso32/Makefile2
-rw-r--r--kernel/arch/s390/kernel/vdso64/Makefile2
-rw-r--r--kernel/arch/s390/kernel/vtime.c70
-rw-r--r--kernel/arch/s390/kvm/diag.c13
-rw-r--r--kernel/arch/s390/kvm/guestdbg.c35
-rw-r--r--kernel/arch/s390/kvm/intercept.c58
-rw-r--r--kernel/arch/s390/kvm/interrupt.c320
-rw-r--r--kernel/arch/s390/kvm/kvm-s390.c397
-rw-r--r--kernel/arch/s390/kvm/kvm-s390.h71
-rw-r--r--kernel/arch/s390/kvm/priv.c53
-rw-r--r--kernel/arch/s390/kvm/sigp.c21
-rw-r--r--kernel/arch/s390/kvm/trace-s390.h33
-rw-r--r--kernel/arch/s390/lib/delay.c31
-rw-r--r--kernel/arch/s390/lib/find.c4
-rw-r--r--kernel/arch/s390/lib/spinlock.c4
-rw-r--r--kernel/arch/s390/lib/uaccess.c27
-rw-r--r--kernel/arch/s390/mm/extable.c8
-rw-r--r--kernel/arch/s390/mm/extmem.c3
-rw-r--r--kernel/arch/s390/mm/fault.c7
-rw-r--r--kernel/arch/s390/mm/gup.c10
-rw-r--r--kernel/arch/s390/mm/hugetlbpage.c72
-rw-r--r--kernel/arch/s390/mm/init.c74
-rw-r--r--kernel/arch/s390/mm/mem_detect.c4
-rw-r--r--kernel/arch/s390/mm/mmap.c60
-rw-r--r--kernel/arch/s390/mm/pgtable.c225
-rw-r--r--kernel/arch/s390/net/bpf_jit.h15
-rw-r--r--kernel/arch/s390/net/bpf_jit_comp.c197
-rw-r--r--kernel/arch/s390/numa/Makefile3
-rw-r--r--kernel/arch/s390/numa/mode_emu.c536
-rw-r--r--kernel/arch/s390/numa/numa.c184
-rw-r--r--kernel/arch/s390/numa/numa_mode.h24
-rw-r--r--kernel/arch/s390/numa/toptree.c342
-rw-r--r--kernel/arch/s390/numa/toptree.h60
-rw-r--r--kernel/arch/s390/oprofile/init.c1
-rw-r--r--kernel/arch/s390/pci/pci.c40
-rw-r--r--kernel/arch/s390/pci/pci_dma.c121
-rw-r--r--kernel/arch/s390/pci/pci_event.c20
-rw-r--r--kernel/arch/s390/pci/pci_insn.c33
-rw-r--r--kernel/arch/s390/pci/pci_sysfs.c17
151 files changed, 5548 insertions, 3079 deletions
diff --git a/kernel/arch/s390/Kbuild b/kernel/arch/s390/Kbuild
index 2938934c6..e256592eb 100644
--- a/kernel/arch/s390/Kbuild
+++ b/kernel/arch/s390/Kbuild
@@ -6,3 +6,4 @@ obj-$(CONFIG_S390_HYPFS_FS) += hypfs/
obj-$(CONFIG_APPLDATA_BASE) += appldata/
obj-y += net/
obj-$(CONFIG_PCI) += pci/
+obj-$(CONFIG_NUMA) += numa/
diff --git a/kernel/arch/s390/Kconfig b/kernel/arch/s390/Kconfig
index b06dc3839..3a55f493c 100644
--- a/kernel/arch/s390/Kconfig
+++ b/kernel/arch/s390/Kconfig
@@ -48,6 +48,7 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
config KEXEC
def_bool y
+ select KEXEC_CORE
config AUDIT_ARCH
def_bool y
@@ -99,20 +100,26 @@ config S390
select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
select ARCH_SAVE_PAGE_KEYS if HIBERNATION
select ARCH_SUPPORTS_ATOMIC_RMW
+ select ARCH_SUPPORTS_NUMA_BALANCING
+ select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
+ select ARCH_WANTS_PROT_NUMA_PROT_NONE
select ARCH_WANT_IPC_PARSE_VERSION
select BUILDTIME_EXTABLE_SORT
select CLONE_BACKWARDS2
select DYNAMIC_FTRACE if FUNCTION_TRACER
select GENERIC_CLOCKEVENTS
+ select GENERIC_CPU_AUTOPROBE
select GENERIC_CPU_DEVICES if !SMP
select GENERIC_FIND_FIRST_BIT
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_EARLY_PFN_TO_NID
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_SECCOMP_FILTER
+ select HAVE_ARCH_SOFT_DIRTY
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
@@ -153,6 +160,7 @@ config S390
select VIRT_CPU_ACCOUNTING
select VIRT_TO_BUS
+
config SCHED_OMIT_FRAME_POINTER
def_bool y
@@ -385,6 +393,76 @@ config HOTPLUG_CPU
config SCHED_SMT
def_bool n
+# Some NUMA nodes have memory ranges that span
+# other nodes. Even though a pfn is valid and
+# between a node's start and end pfns, it may not
+# reside on that node. See memmap_init_zone()
+# for details. <- They meant memory holes!
+config NODES_SPAN_OTHER_NODES
+ def_bool NUMA
+
+config NUMA
+ bool "NUMA support"
+ depends on SMP && 64BIT && SCHED_TOPOLOGY
+ default n
+ help
+ Enable NUMA support
+
+ This option adds NUMA support to the kernel.
+
+ An operation mode can be selected by appending
+ numa=<method> to the kernel command line.
+
+ The default behaviour is identical to appending numa=plain to
+ the command line. This will create just one node with all
+ available memory and all CPUs in it.
+
+config NODES_SHIFT
+ int "Maximum NUMA nodes (as a power of 2)"
+ range 1 10
+ depends on NUMA
+ default "4"
+ help
+ Specify the maximum number of NUMA nodes available on the target
+ system. Increases memory reserved to accommodate various tables.
+
+menu "Select NUMA modes"
+ depends on NUMA
+
+config NUMA_EMU
+ bool "NUMA emulation"
+ default y
+ help
+ Numa emulation mode will split the available system memory into
+ equal chunks which then are distributed over the configured number
+ of nodes in a round-robin manner.
+
+ The number of fake nodes is limited by the number of available memory
+ chunks (i.e. memory size / fake size) and the number of supported
+ nodes in the kernel.
+
+ The CPUs are assigned to the nodes in a way that partially respects
+ the original machine topology (if supported by the machine).
+ Fair distribution of the CPUs is not guaranteed.
+
+config EMU_SIZE
+ hex "NUMA emulation memory chunk size"
+ default 0x10000000
+ range 0x400000 0x100000000
+ depends on NUMA_EMU
+ help
+ Select the default size by which the memory is chopped and then
+ assigned to emulated NUMA nodes.
+
+ This can be overridden by specifying
+
+ emu_size=<n>
+
+ on the kernel command line where also suffixes K, M, G, and T are
+ supported.
+
+endmenu
+
config SCHED_MC
def_bool n
@@ -506,6 +584,7 @@ menuconfig PCI
bool "PCI support"
select HAVE_DMA_ATTRS
select PCI_MSI
+ select IOMMU_SUPPORT
help
Enable PCI support.
diff --git a/kernel/arch/s390/Makefile b/kernel/arch/s390/Makefile
index 667b1bca5..e8d4423e4 100644
--- a/kernel/arch/s390/Makefile
+++ b/kernel/arch/s390/Makefile
@@ -33,6 +33,8 @@ mflags-$(CONFIG_MARCH_Z196) := -march=z196
mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12
mflags-$(CONFIG_MARCH_Z13) := -march=z13
+export CC_FLAGS_MARCH := $(mflags-y)
+
aflags-y += $(mflags-y)
cflags-y += $(mflags-y)
diff --git a/kernel/arch/s390/boot/compressed/Makefile b/kernel/arch/s390/boot/compressed/Makefile
index d4788111c..fac6ac979 100644
--- a/kernel/arch/s390/boot/compressed/Makefile
+++ b/kernel/arch/s390/boot/compressed/Makefile
@@ -10,7 +10,7 @@ targets += misc.o piggy.o sizes.h head.o
KBUILD_CFLAGS := -m64 -D__KERNEL__ $(LINUX_INCLUDE) -O2
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
-KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks
+KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks -msoft-float
KBUILD_CFLAGS += $(call cc-option,-mpacked-stack)
KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
diff --git a/kernel/arch/s390/configs/default_defconfig b/kernel/arch/s390/configs/default_defconfig
index 64707750c..ed7da281d 100644
--- a/kernel/arch/s390/configs/default_defconfig
+++ b/kernel/arch/s390/configs/default_defconfig
@@ -13,17 +13,20 @@ CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_RCU_FAST_NO_HZ=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
-CONFIG_RESOURCE_COUNTERS=y
CONFIG_CGROUP_PERF=y
CONFIG_CFS_BANDWIDTH=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_BLK_CGROUP=y
+CONFIG_NAMESPACES=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_BPF_SYSCALL=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_OPROFILE=m
@@ -44,9 +47,11 @@ CONFIG_SOLARIS_X86_PARTITION=y
CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_CFQ_GROUP_IOSCHED=y
CONFIG_DEFAULT_DEADLINE=y
+CONFIG_LIVEPATCH=y
CONFIG_MARCH_Z196=y
CONFIG_TUNE_ZEC12=y
CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
CONFIG_PREEMPT=y
CONFIG_HZ_100=y
CONFIG_MEMORY_HOTPLUG=y
@@ -242,9 +247,9 @@ CONFIG_NF_CONNTRACK_IPV4=m
# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
CONFIG_NF_TABLES_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_NF_TABLES_ARP=m
CONFIG_NF_NAT_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -264,8 +269,8 @@ CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_TABLES_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NF_NAT_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -353,7 +358,6 @@ CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_OSD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_BLK_DEV_XIP=y
CONFIG_CDROM_PKTCDVD=m
CONFIG_ATA_OVER_ETH=m
CONFIG_VIRTIO_BLK=y
@@ -377,7 +381,7 @@ CONFIG_ISCSI_TCP=m
CONFIG_SCSI_DEBUG=m
CONFIG_ZFCP=y
CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH=y
CONFIG_SCSI_DH_RDAC=m
CONFIG_SCSI_DH_HP_SW=m
CONFIG_SCSI_DH_EMC=m
@@ -458,7 +462,6 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT3_FS_POSIX_ACL=y
@@ -544,7 +547,6 @@ CONFIG_FRAME_WARN=1024
CONFIG_READABLE_ASM=y
CONFIG_UNUSED_SYMBOLS=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_PAGEALLOC=y
CONFIG_DEBUG_OBJECTS=y
CONFIG_DEBUG_OBJECTS_SELFTEST=y
@@ -558,6 +560,7 @@ CONFIG_SLUB_STATS=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_VM=y
CONFIG_DEBUG_VM_RB=y
+CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
CONFIG_DEBUG_PER_CPU_MAPS=y
CONFIG_DEBUG_SHIRQ=y
@@ -575,7 +578,6 @@ CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
CONFIG_DEBUG_CREDENTIALS=y
-CONFIG_PROVE_RCU=y
CONFIG_RCU_TORTURE_TEST=m
CONFIG_RCU_CPU_STALL_TIMEOUT=300
CONFIG_NOTIFIER_ERROR_INJECTION=m
@@ -611,7 +613,6 @@ CONFIG_TEST_BPF=m
# CONFIG_STRICT_DEVMEM is not set
CONFIG_S390_PTDUMP=y
CONFIG_ENCRYPTED_KEYS=m
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
diff --git a/kernel/arch/s390/configs/gcov_defconfig b/kernel/arch/s390/configs/gcov_defconfig
index 5c3097272..9858b14cd 100644
--- a/kernel/arch/s390/configs/gcov_defconfig
+++ b/kernel/arch/s390/configs/gcov_defconfig
@@ -13,15 +13,18 @@ CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_RCU_FAST_NO_HZ=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
-CONFIG_RESOURCE_COUNTERS=y
CONFIG_CGROUP_PERF=y
CONFIG_BLK_CGROUP=y
+CONFIG_NAMESPACES=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_BPF_SYSCALL=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_OPROFILE=m
@@ -47,6 +50,7 @@ CONFIG_DEFAULT_DEADLINE=y
CONFIG_MARCH_Z196=y
CONFIG_TUNE_ZEC12=y
CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
CONFIG_HZ_100=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
@@ -240,9 +244,9 @@ CONFIG_NF_CONNTRACK_IPV4=m
# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
CONFIG_NF_TABLES_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_NF_TABLES_ARP=m
CONFIG_NF_NAT_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -262,8 +266,8 @@ CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_TABLES_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NF_NAT_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -350,7 +354,6 @@ CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_OSD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_BLK_DEV_XIP=y
CONFIG_CDROM_PKTCDVD=m
CONFIG_ATA_OVER_ETH=m
CONFIG_VIRTIO_BLK=y
@@ -374,7 +377,7 @@ CONFIG_ISCSI_TCP=m
CONFIG_SCSI_DEBUG=m
CONFIG_ZFCP=y
CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH=y
CONFIG_SCSI_DH_RDAC=m
CONFIG_SCSI_DH_HP_SW=m
CONFIG_SCSI_DH_EMC=m
@@ -455,7 +458,6 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT3_FS_POSIX_ACL=y
@@ -538,7 +540,7 @@ CONFIG_DEBUG_INFO=y
CONFIG_FRAME_WARN=1024
CONFIG_UNUSED_SYMBOLS=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
CONFIG_PANIC_ON_OOPS=y
CONFIG_TIMER_STATS=y
@@ -558,7 +560,6 @@ CONFIG_ATOMIC64_SELFTEST=y
# CONFIG_STRICT_DEVMEM is not set
CONFIG_S390_PTDUMP=y
CONFIG_ENCRYPTED_KEYS=m
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
diff --git a/kernel/arch/s390/configs/performance_defconfig b/kernel/arch/s390/configs/performance_defconfig
index bda70f1ff..7f14f8071 100644
--- a/kernel/arch/s390/configs/performance_defconfig
+++ b/kernel/arch/s390/configs/performance_defconfig
@@ -13,15 +13,19 @@ CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_RCU_FAST_NO_HZ=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
+# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
-CONFIG_RESOURCE_COUNTERS=y
CONFIG_CGROUP_PERF=y
CONFIG_BLK_CGROUP=y
+CONFIG_NAMESPACES=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_BPF_SYSCALL=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_OPROFILE=m
@@ -42,9 +46,11 @@ CONFIG_SOLARIS_X86_PARTITION=y
CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_CFQ_GROUP_IOSCHED=y
CONFIG_DEFAULT_DEADLINE=y
+CONFIG_LIVEPATCH=y
CONFIG_MARCH_Z196=y
CONFIG_TUNE_ZEC12=y
-CONFIG_NR_CPUS=256
+CONFIG_NR_CPUS=512
+CONFIG_NUMA=y
CONFIG_HZ_100=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
@@ -238,9 +244,9 @@ CONFIG_NF_CONNTRACK_IPV4=m
# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
CONFIG_NF_TABLES_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_NF_TABLES_ARP=m
CONFIG_NF_NAT_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -260,8 +266,8 @@ CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_TABLES_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NF_NAT_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -348,7 +354,6 @@ CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_OSD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_BLK_DEV_XIP=y
CONFIG_CDROM_PKTCDVD=m
CONFIG_ATA_OVER_ETH=m
CONFIG_VIRTIO_BLK=y
@@ -372,7 +377,7 @@ CONFIG_ISCSI_TCP=m
CONFIG_SCSI_DEBUG=m
CONFIG_ZFCP=y
CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH=y
CONFIG_SCSI_DH_RDAC=m
CONFIG_SCSI_DH_HP_SW=m
CONFIG_SCSI_DH_EMC=m
@@ -453,7 +458,6 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT3_FS_POSIX_ACL=y
@@ -536,7 +540,7 @@ CONFIG_DEBUG_INFO=y
CONFIG_FRAME_WARN=1024
CONFIG_UNUSED_SYMBOLS=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_TIMER_STATS=y
CONFIG_RCU_TORTURE_TEST=m
@@ -553,7 +557,6 @@ CONFIG_ATOMIC64_SELFTEST=y
# CONFIG_STRICT_DEVMEM is not set
CONFIG_S390_PTDUMP=y
CONFIG_ENCRYPTED_KEYS=m
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
diff --git a/kernel/arch/s390/configs/zfcpdump_defconfig b/kernel/arch/s390/configs/zfcpdump_defconfig
index 1b0184a0f..92805d604 100644
--- a/kernel/arch/s390/configs/zfcpdump_defconfig
+++ b/kernel/arch/s390/configs/zfcpdump_defconfig
@@ -1,7 +1,6 @@
# CONFIG_SWAP is not set
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
-CONFIG_RCU_FAST_NO_HZ=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
# CONFIG_COMPAT_BRK is not set
@@ -54,10 +53,6 @@ CONFIG_RAW_DRIVER=y
# CONFIG_MONWRITER is not set
# CONFIG_S390_VMUR is not set
# CONFIG_HID is not set
-CONFIG_MEMSTICK=y
-CONFIG_MEMSTICK_DEBUG=y
-CONFIG_MEMSTICK_UNSAFE_RESUME=y
-CONFIG_MSPRO_BLOCK=y
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
diff --git a/kernel/arch/s390/crypto/aes_s390.c b/kernel/arch/s390/crypto/aes_s390.c
index 5566ce80a..0b9b95f3c 100644
--- a/kernel/arch/s390/crypto/aes_s390.c
+++ b/kernel/arch/s390/crypto/aes_s390.c
@@ -24,6 +24,7 @@
#include <crypto/algapi.h>
#include <linux/err.h>
#include <linux/module.h>
+#include <linux/cpufeature.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include "crypt_s390.h"
@@ -976,7 +977,7 @@ static void __exit aes_s390_fini(void)
crypto_unregister_alg(&aes_alg);
}
-module_init(aes_s390_init);
+module_cpu_feature_match(MSA, aes_s390_init);
module_exit(aes_s390_fini);
MODULE_ALIAS_CRYPTO("aes-all");
diff --git a/kernel/arch/s390/crypto/des_s390.c b/kernel/arch/s390/crypto/des_s390.c
index 9e05cc453..fba1c10a2 100644
--- a/kernel/arch/s390/crypto/des_s390.c
+++ b/kernel/arch/s390/crypto/des_s390.c
@@ -16,6 +16,7 @@
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <crypto/algapi.h>
#include <crypto/des.h>
@@ -616,7 +617,7 @@ static void __exit des_s390_exit(void)
crypto_unregister_alg(&des_alg);
}
-module_init(des_s390_init);
+module_cpu_feature_match(MSA, des_s390_init);
module_exit(des_s390_exit);
MODULE_ALIAS_CRYPTO("des");
diff --git a/kernel/arch/s390/crypto/ghash_s390.c b/kernel/arch/s390/crypto/ghash_s390.c
index b258110da..26e14efd3 100644
--- a/kernel/arch/s390/crypto/ghash_s390.c
+++ b/kernel/arch/s390/crypto/ghash_s390.c
@@ -9,6 +9,7 @@
#include <crypto/internal/hash.h>
#include <linux/module.h>
+#include <linux/cpufeature.h>
#include "crypt_s390.h"
@@ -158,7 +159,7 @@ static void __exit ghash_mod_exit(void)
crypto_unregister_shash(&ghash_alg);
}
-module_init(ghash_mod_init);
+module_cpu_feature_match(MSA, ghash_mod_init);
module_exit(ghash_mod_exit);
MODULE_ALIAS_CRYPTO("ghash");
diff --git a/kernel/arch/s390/crypto/prng.c b/kernel/arch/s390/crypto/prng.c
index 9d5192c94..b8045b97f 100644
--- a/kernel/arch/s390/crypto/prng.c
+++ b/kernel/arch/s390/crypto/prng.c
@@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/mutex.h>
+#include <linux/cpufeature.h>
#include <linux/random.h>
#include <linux/slab.h>
#include <asm/debug.h>
@@ -914,6 +915,5 @@ static void __exit prng_exit(void)
}
}
-
-module_init(prng_init);
+module_cpu_feature_match(MSA, prng_init);
module_exit(prng_exit);
diff --git a/kernel/arch/s390/crypto/sha.h b/kernel/arch/s390/crypto/sha.h
index f4e9dc716..10f200790 100644
--- a/kernel/arch/s390/crypto/sha.h
+++ b/kernel/arch/s390/crypto/sha.h
@@ -19,7 +19,7 @@
#include <crypto/sha.h>
/* must be big enough for the largest SHA variant */
-#define SHA_MAX_STATE_SIZE 16
+#define SHA_MAX_STATE_SIZE (SHA512_DIGEST_SIZE / 4)
#define SHA_MAX_BLOCK_SIZE SHA512_BLOCK_SIZE
struct s390_sha_ctx {
diff --git a/kernel/arch/s390/crypto/sha1_s390.c b/kernel/arch/s390/crypto/sha1_s390.c
index 5b2bee323..9208eadae 100644
--- a/kernel/arch/s390/crypto/sha1_s390.c
+++ b/kernel/arch/s390/crypto/sha1_s390.c
@@ -26,6 +26,7 @@
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/cpufeature.h>
#include <crypto/sha.h>
#include "crypt_s390.h"
@@ -100,7 +101,7 @@ static void __exit sha1_s390_fini(void)
crypto_unregister_shash(&alg);
}
-module_init(sha1_s390_init);
+module_cpu_feature_match(MSA, sha1_s390_init);
module_exit(sha1_s390_fini);
MODULE_ALIAS_CRYPTO("sha1");
diff --git a/kernel/arch/s390/crypto/sha256_s390.c b/kernel/arch/s390/crypto/sha256_s390.c
index b74ff1581..667888f5c 100644
--- a/kernel/arch/s390/crypto/sha256_s390.c
+++ b/kernel/arch/s390/crypto/sha256_s390.c
@@ -16,6 +16,7 @@
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/cpufeature.h>
#include <crypto/sha.h>
#include "crypt_s390.h"
@@ -140,7 +141,7 @@ static void __exit sha256_s390_fini(void)
crypto_unregister_shash(&sha256_alg);
}
-module_init(sha256_s390_init);
+module_cpu_feature_match(MSA, sha256_s390_init);
module_exit(sha256_s390_fini);
MODULE_ALIAS_CRYPTO("sha256");
diff --git a/kernel/arch/s390/crypto/sha512_s390.c b/kernel/arch/s390/crypto/sha512_s390.c
index 0c36989ba..2ba66b151 100644
--- a/kernel/arch/s390/crypto/sha512_s390.c
+++ b/kernel/arch/s390/crypto/sha512_s390.c
@@ -18,6 +18,7 @@
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/cpufeature.h>
#include "sha.h"
#include "crypt_s390.h"
@@ -148,7 +149,7 @@ static void __exit fini(void)
crypto_unregister_shash(&sha384_alg);
}
-module_init(init);
+module_cpu_feature_match(MSA, init);
module_exit(fini);
MODULE_LICENSE("GPL");
diff --git a/kernel/arch/s390/defconfig b/kernel/arch/s390/defconfig
index 83ef702d2..9256b48e7 100644
--- a/kernel/arch/s390/defconfig
+++ b/kernel/arch/s390/defconfig
@@ -8,7 +8,6 @@ CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_RCU_FAST_NO_HZ=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_CGROUPS=y
@@ -31,9 +30,11 @@ CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
+CONFIG_BLK_DEV_INTEGRITY=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
CONFIG_DEFAULT_DEADLINE=y
+CONFIG_LIVEPATCH=y
CONFIG_MARCH_Z196=y
CONFIG_NR_CPUS=256
CONFIG_HZ_100=y
@@ -41,7 +42,6 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_CMA=y
CONFIG_CRASH_DUMP=y
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
@@ -125,6 +125,7 @@ CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_PAGEALLOC=y
CONFIG_DETECT_HUNG_TASK=y
+CONFIG_PANIC_ON_OOPS=y
CONFIG_TIMER_STATS=y
CONFIG_DEBUG_RT_MUTEXES=y
CONFIG_PROVE_LOCKING=y
@@ -135,12 +136,16 @@ CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_PI_LIST=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
-CONFIG_PROVE_RCU=y
CONFIG_RCU_CPU_STALL_TIMEOUT=60
+# CONFIG_RCU_CPU_STALL_INFO is not set
CONFIG_RCU_TRACE=y
CONFIG_LATENCYTOP=y
CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
+CONFIG_TRACER_SNAPSHOT=y
+CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
+CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_UPROBE_EVENT=y
CONFIG_KPROBES_SANITY_TEST=y
# CONFIG_STRICT_DEVMEM is not set
CONFIG_S390_PTDUMP=y
@@ -187,6 +192,7 @@ CONFIG_CRYPTO_ZLIB=m
CONFIG_CRYPTO_LZO=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
CONFIG_ZCRYPT=m
CONFIG_CRYPTO_SHA1_S390=m
CONFIG_CRYPTO_SHA256_S390=m
diff --git a/kernel/arch/s390/hypfs/hypfs_diag.c b/kernel/arch/s390/hypfs/hypfs_diag.c
index 5eeffeefa..045035796 100644
--- a/kernel/arch/s390/hypfs/hypfs_diag.c
+++ b/kernel/arch/s390/hypfs/hypfs_diag.c
@@ -15,6 +15,7 @@
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
+#include <asm/diag.h>
#include <asm/ebcdic.h>
#include "hypfs.h"
@@ -336,7 +337,7 @@ static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
/* Diagnose 204 functions */
-static int diag204(unsigned long subcode, unsigned long size, void *addr)
+static inline int __diag204(unsigned long subcode, unsigned long size, void *addr)
{
register unsigned long _subcode asm("0") = subcode;
register unsigned long _size asm("1") = size;
@@ -351,6 +352,12 @@ static int diag204(unsigned long subcode, unsigned long size, void *addr)
return _size;
}
+static int diag204(unsigned long subcode, unsigned long size, void *addr)
+{
+ diag_stat_inc(DIAG_STAT_X204);
+ return __diag204(subcode, size, addr);
+}
+
/*
* For the old diag subcode 4 with simple data format we have to use real
* memory. If we use subcode 6 or 7 with extended data format, we can (and
@@ -505,6 +512,7 @@ static int diag224(void *ptr)
{
int rc = -EOPNOTSUPP;
+ diag_stat_inc(DIAG_STAT_X224);
asm volatile(
" diag %1,%2,0x224\n"
"0: lhi %0,0x0\n"
diff --git a/kernel/arch/s390/hypfs/hypfs_diag0c.c b/kernel/arch/s390/hypfs/hypfs_diag0c.c
index 24c747a0f..0f1927cbb 100644
--- a/kernel/arch/s390/hypfs/hypfs_diag0c.c
+++ b/kernel/arch/s390/hypfs/hypfs_diag0c.c
@@ -8,6 +8,7 @@
#include <linux/slab.h>
#include <linux/cpu.h>
+#include <asm/diag.h>
#include <asm/hypfs.h>
#include "hypfs.h"
@@ -18,6 +19,7 @@
*/
static void diag0c(struct hypfs_diag0c_entry *entry)
{
+ diag_stat_inc(DIAG_STAT_X00C);
asm volatile (
" sam31\n"
" diag %0,%0,0x0c\n"
diff --git a/kernel/arch/s390/hypfs/hypfs_sprp.c b/kernel/arch/s390/hypfs/hypfs_sprp.c
index f043c3c7e..c9e5c72f7 100644
--- a/kernel/arch/s390/hypfs/hypfs_sprp.c
+++ b/kernel/arch/s390/hypfs/hypfs_sprp.c
@@ -13,6 +13,7 @@
#include <linux/types.h>
#include <linux/uaccess.h>
#include <asm/compat.h>
+#include <asm/diag.h>
#include <asm/sclp.h>
#include "hypfs.h"
@@ -22,7 +23,7 @@
#define DIAG304_CMD_MAX 2
-static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd)
+static inline unsigned long __hypfs_sprp_diag304(void *data, unsigned long cmd)
{
register unsigned long _data asm("2") = (unsigned long) data;
register unsigned long _rc asm("3");
@@ -34,6 +35,12 @@ static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd)
return _rc;
}
+static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd)
+{
+ diag_stat_inc(DIAG_STAT_X304);
+ return __hypfs_sprp_diag304(data, cmd);
+}
+
static void hypfs_sprp_free(const void *data)
{
free_page((unsigned long) data);
@@ -128,14 +135,14 @@ static struct hypfs_dbfs_file hypfs_sprp_file = {
int hypfs_sprp_init(void)
{
- if (!sclp_has_sprp())
+ if (!sclp.has_sprp)
return 0;
return hypfs_dbfs_create_file(&hypfs_sprp_file);
}
void hypfs_sprp_exit(void)
{
- if (!sclp_has_sprp())
+ if (!sclp.has_sprp)
return;
hypfs_dbfs_remove_file(&hypfs_sprp_file);
}
diff --git a/kernel/arch/s390/hypfs/hypfs_vm.c b/kernel/arch/s390/hypfs/hypfs_vm.c
index afbe07907..44feac38c 100644
--- a/kernel/arch/s390/hypfs/hypfs_vm.c
+++ b/kernel/arch/s390/hypfs/hypfs_vm.c
@@ -9,6 +9,7 @@
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/vmalloc.h>
+#include <asm/diag.h>
#include <asm/ebcdic.h>
#include <asm/timex.h>
#include "hypfs.h"
@@ -66,6 +67,7 @@ static int diag2fc(int size, char* query, void *addr)
memset(parm_list.aci_grp, 0x40, NAME_LEN);
rc = -1;
+ diag_stat_inc(DIAG_STAT_X2FC);
asm volatile(
" diag %0,%1,0x2fc\n"
"0:\n"
diff --git a/kernel/arch/s390/hypfs/inode.c b/kernel/arch/s390/hypfs/inode.c
index 2eeb0a0f5..b2e5902bd 100644
--- a/kernel/arch/s390/hypfs/inode.c
+++ b/kernel/arch/s390/hypfs/inode.c
@@ -62,18 +62,13 @@ static void hypfs_add_dentry(struct dentry *dentry)
hypfs_last_dentry = dentry;
}
-static inline int hypfs_positive(struct dentry *dentry)
-{
- return d_really_is_positive(dentry) && !d_unhashed(dentry);
-}
-
static void hypfs_remove(struct dentry *dentry)
{
struct dentry *parent;
parent = dentry->d_parent;
mutex_lock(&d_inode(parent)->i_mutex);
- if (hypfs_positive(dentry)) {
+ if (simple_positive(dentry)) {
if (d_is_dir(dentry))
simple_rmdir(d_inode(parent), dentry);
else
diff --git a/kernel/arch/s390/include/asm/Kbuild b/kernel/arch/s390/include/asm/Kbuild
index c631f98fd..9043d2e1e 100644
--- a/kernel/arch/s390/include/asm/Kbuild
+++ b/kernel/arch/s390/include/asm/Kbuild
@@ -3,6 +3,7 @@
generic-y += clkdev.h
generic-y += irq_work.h
generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
generic-y += preempt.h
-generic-y += scatterlist.h
generic-y += trace_clock.h
+generic-y += word-at-a-time.h
diff --git a/kernel/arch/s390/include/asm/appldata.h b/kernel/arch/s390/include/asm/appldata.h
index 16887c5fd..a6263d4e8 100644
--- a/kernel/arch/s390/include/asm/appldata.h
+++ b/kernel/arch/s390/include/asm/appldata.h
@@ -7,6 +7,7 @@
#ifndef _ASM_S390_APPLDATA_H
#define _ASM_S390_APPLDATA_H
+#include <asm/diag.h>
#include <asm/io.h>
#define APPLDATA_START_INTERVAL_REC 0x80
@@ -53,6 +54,7 @@ static inline int appldata_asm(struct appldata_product_id *id,
parm_list.buffer_length = length;
parm_list.product_id_addr = (unsigned long) id;
parm_list.buffer_addr = virt_to_phys(buffer);
+ diag_stat_inc(DIAG_STAT_X0DC);
asm volatile(
" diag %1,%0,0xdc"
: "=d" (ry)
diff --git a/kernel/arch/s390/include/asm/atomic.h b/kernel/arch/s390/include/asm/atomic.h
index adbe3802e..911064aa5 100644
--- a/kernel/arch/s390/include/asm/atomic.h
+++ b/kernel/arch/s390/include/asm/atomic.h
@@ -27,6 +27,7 @@
#define __ATOMIC_OR "lao"
#define __ATOMIC_AND "lan"
#define __ATOMIC_ADD "laa"
+#define __ATOMIC_XOR "lax"
#define __ATOMIC_BARRIER "bcr 14,0\n"
#define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier) \
@@ -35,7 +36,6 @@
\
typecheck(atomic_t *, ptr); \
asm volatile( \
- __barrier \
op_string " %0,%2,%1\n" \
__barrier \
: "=d" (old_val), "+Q" ((ptr)->counter) \
@@ -49,6 +49,7 @@
#define __ATOMIC_OR "or"
#define __ATOMIC_AND "nr"
#define __ATOMIC_ADD "ar"
+#define __ATOMIC_XOR "xr"
#define __ATOMIC_BARRIER "\n"
#define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier) \
@@ -118,15 +119,17 @@ static inline void atomic_add(int i, atomic_t *v)
#define atomic_dec_return(_v) atomic_sub_return(1, _v)
#define atomic_dec_and_test(_v) (atomic_sub_return(1, _v) == 0)
-static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
- __ATOMIC_LOOP(v, ~mask, __ATOMIC_AND, __ATOMIC_NO_BARRIER);
+#define ATOMIC_OP(op, OP) \
+static inline void atomic_##op(int i, atomic_t *v) \
+{ \
+ __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_NO_BARRIER); \
}
-static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
- __ATOMIC_LOOP(v, mask, __ATOMIC_OR, __ATOMIC_NO_BARRIER);
-}
+ATOMIC_OP(and, AND)
+ATOMIC_OP(or, OR)
+ATOMIC_OP(xor, XOR)
+
+#undef ATOMIC_OP
#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
@@ -167,6 +170,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
#define __ATOMIC64_OR "laog"
#define __ATOMIC64_AND "lang"
#define __ATOMIC64_ADD "laag"
+#define __ATOMIC64_XOR "laxg"
#define __ATOMIC64_BARRIER "bcr 14,0\n"
#define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier) \
@@ -175,7 +179,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
\
typecheck(atomic64_t *, ptr); \
asm volatile( \
- __barrier \
op_string " %0,%2,%1\n" \
__barrier \
: "=d" (old_val), "+Q" ((ptr)->counter) \
@@ -189,6 +192,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
#define __ATOMIC64_OR "ogr"
#define __ATOMIC64_AND "ngr"
#define __ATOMIC64_ADD "agr"
+#define __ATOMIC64_XOR "xgr"
#define __ATOMIC64_BARRIER "\n"
#define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier) \
@@ -247,16 +251,6 @@ static inline void atomic64_add(long long i, atomic64_t *v)
__ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_NO_BARRIER);
}
-static inline void atomic64_clear_mask(unsigned long mask, atomic64_t *v)
-{
- __ATOMIC64_LOOP(v, ~mask, __ATOMIC64_AND, __ATOMIC64_NO_BARRIER);
-}
-
-static inline void atomic64_set_mask(unsigned long mask, atomic64_t *v)
-{
- __ATOMIC64_LOOP(v, mask, __ATOMIC64_OR, __ATOMIC64_NO_BARRIER);
-}
-
#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
static inline long long atomic64_cmpxchg(atomic64_t *v,
@@ -270,6 +264,17 @@ static inline long long atomic64_cmpxchg(atomic64_t *v,
return old;
}
+#define ATOMIC64_OP(op, OP) \
+static inline void atomic64_##op(long i, atomic64_t *v) \
+{ \
+ __ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_NO_BARRIER); \
+}
+
+ATOMIC64_OP(and, AND)
+ATOMIC64_OP(or, OR)
+ATOMIC64_OP(xor, XOR)
+
+#undef ATOMIC64_OP
#undef __ATOMIC64_LOOP
static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u)
diff --git a/kernel/arch/s390/include/asm/barrier.h b/kernel/arch/s390/include/asm/barrier.h
index 8d724718e..d68e11e0d 100644
--- a/kernel/arch/s390/include/asm/barrier.h
+++ b/kernel/arch/s390/include/asm/barrier.h
@@ -22,10 +22,10 @@
#define mb() do { asm volatile(__ASM_BARRIER : : : "memory"); } while (0)
-#define rmb() mb()
-#define wmb() mb()
-#define dma_rmb() rmb()
-#define dma_wmb() wmb()
+#define rmb() barrier()
+#define wmb() barrier()
+#define dma_rmb() mb()
+#define dma_wmb() mb()
#define smp_mb() mb()
#define smp_rmb() rmb()
#define smp_wmb() wmb()
@@ -36,18 +36,18 @@
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()
-#define set_mb(var, value) do { var = value; mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
barrier(); \
- ACCESS_ONCE(*p) = (v); \
+ WRITE_ONCE(*p, v); \
} while (0)
#define smp_load_acquire(p) \
({ \
- typeof(*p) ___p1 = ACCESS_ONCE(*p); \
+ typeof(*p) ___p1 = READ_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
barrier(); \
___p1; \
diff --git a/kernel/arch/s390/include/asm/bitops.h b/kernel/arch/s390/include/asm/bitops.h
index 9b68e98a7..8043f10da 100644
--- a/kernel/arch/s390/include/asm/bitops.h
+++ b/kernel/arch/s390/include/asm/bitops.h
@@ -11,30 +11,25 @@
* big-endian system because, unlike little endian, the number of each
* bit depends on the word size.
*
- * The bitop functions are defined to work on unsigned longs, so for an
- * s390x system the bits end up numbered:
+ * The bitop functions are defined to work on unsigned longs, so the bits
+ * end up numbered:
* |63..............0|127............64|191...........128|255...........192|
- * and on s390:
- * |31.....0|63....32|95....64|127...96|159..128|191..160|223..192|255..224|
*
* There are a few little-endian macros used mostly for filesystem
- * bitmaps, these work on similar bit arrays layouts, but
- * byte-oriented:
+ * bitmaps, these work on similar bit array layouts, but byte-oriented:
* |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56|
*
- * The main difference is that bit 3-5 (64b) or 3-4 (32b) in the bit
- * number field needs to be reversed compared to the big-endian bit
- * fields. This can be achieved by XOR with 0x38 (64b) or 0x18 (32b).
+ * The main difference is that bit 3-5 in the bit number field needs to be
+ * reversed compared to the big-endian bit fields. This can be achieved by
+ * XOR with 0x38.
*
- * We also have special functions which work with an MSB0 encoding:
- * on an s390x system the bits are numbered:
+ * We also have special functions which work with an MSB0 encoding.
+ * The bits are numbered:
* |0..............63|64............127|128...........191|192...........255|
- * and on s390:
- * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255|
*
- * The main difference is that bit 0-63 (64b) or 0-31 (32b) in the bit
- * number field needs to be reversed compared to the LSB0 encoded bit
- * fields. This can be achieved by XOR with 0x3f (64b) or 0x1f (32b).
+ * The main difference is that bit 0-63 in the bit number field needs to be
+ * reversed compared to the LSB0 encoded bit fields. This can be achieved by
+ * XOR with 0x3f.
*
*/
@@ -64,7 +59,6 @@
\
typecheck(unsigned long *, (__addr)); \
asm volatile( \
- __barrier \
__op_string " %0,%2,%1\n" \
__barrier \
: "=d" (__old), "+Q" (*(__addr)) \
@@ -276,12 +270,32 @@ static inline int test_bit(unsigned long nr, const volatile unsigned long *ptr)
return (*addr >> (nr & 7)) & 1;
}
+static inline int test_and_set_bit_lock(unsigned long nr,
+ volatile unsigned long *ptr)
+{
+ if (test_bit(nr, ptr))
+ return 1;
+ return test_and_set_bit(nr, ptr);
+}
+
+static inline void clear_bit_unlock(unsigned long nr,
+ volatile unsigned long *ptr)
+{
+ smp_mb__before_atomic();
+ clear_bit(nr, ptr);
+}
+
+static inline void __clear_bit_unlock(unsigned long nr,
+ volatile unsigned long *ptr)
+{
+ smp_mb();
+ __clear_bit(nr, ptr);
+}
+
/*
* Functions which use MSB0 bit numbering.
- * On an s390x system the bits are numbered:
+ * The bits are numbered:
* |0..............63|64............127|128...........191|192...........255|
- * and on s390:
- * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255|
*/
unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size);
unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size,
@@ -446,7 +460,6 @@ static inline int fls(int word)
#include <asm-generic/bitops/ffz.h>
#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/hweight.h>
-#include <asm-generic/bitops/lock.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/kernel/arch/s390/include/asm/cio.h b/kernel/arch/s390/include/asm/cio.h
index 096339207..d1e7b0a0f 100644
--- a/kernel/arch/s390/include/asm/cio.h
+++ b/kernel/arch/s390/include/asm/cio.h
@@ -5,6 +5,7 @@
#define _ASM_S390_CIO_H_
#include <linux/spinlock.h>
+#include <linux/bitops.h>
#include <asm/types.h>
#define LPM_ANYPATH 0xff
@@ -296,12 +297,22 @@ static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1,
return 0;
}
+/**
+ * pathmask_to_pos() - find the position of the left-most bit in a pathmask
+ * @mask: pathmask with at least one bit set
+ */
+static inline u8 pathmask_to_pos(u8 mask)
+{
+ return 8 - ffs(mask);
+}
+
void channel_subsystem_reinit(void);
extern void css_schedule_reprobe(void);
extern void reipl_ccw_dev(struct ccw_dev_id *id);
struct cio_iplinfo {
+ u8 ssid;
u16 devno;
int is_qdio;
};
diff --git a/kernel/arch/s390/include/asm/cmb.h b/kernel/arch/s390/include/asm/cmb.h
index 806eac12e..ed2630c23 100644
--- a/kernel/arch/s390/include/asm/cmb.h
+++ b/kernel/arch/s390/include/asm/cmb.h
@@ -6,6 +6,7 @@
struct ccw_device;
extern int enable_cmf(struct ccw_device *cdev);
extern int disable_cmf(struct ccw_device *cdev);
+extern int __disable_cmf(struct ccw_device *cdev);
extern u64 cmf_read(struct ccw_device *cdev, int index);
extern int cmf_readall(struct ccw_device *cdev, struct cmbdata *data);
diff --git a/kernel/arch/s390/include/asm/cmpxchg.h b/kernel/arch/s390/include/asm/cmpxchg.h
index 4eadec466..24ea6948e 100644
--- a/kernel/arch/s390/include/asm/cmpxchg.h
+++ b/kernel/arch/s390/include/asm/cmpxchg.h
@@ -32,9 +32,7 @@
__old; \
})
-#define __HAVE_ARCH_CMPXCHG
-
-#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \
+#define __cmpxchg_double(p1, p2, o1, o2, n1, n2) \
({ \
register __typeof__(*(p1)) __old1 asm("2") = (o1); \
register __typeof__(*(p2)) __old2 asm("3") = (o2); \
@@ -42,7 +40,7 @@
register __typeof__(*(p2)) __new2 asm("5") = (n2); \
int cc; \
asm volatile( \
- insn " %[old],%[new],%[ptr]\n" \
+ " cdsg %[old],%[new],%[ptr]\n" \
" ipm %[cc]\n" \
" srl %[cc],28" \
: [cc] "=d" (cc), [old] "+d" (__old1), "+d" (__old2) \
@@ -52,30 +50,6 @@
!cc; \
})
-#define __cmpxchg_double_4(p1, p2, o1, o2, n1, n2) \
- __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, "cds")
-
-#define __cmpxchg_double_8(p1, p2, o1, o2, n1, n2) \
- __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, "cdsg")
-
-extern void __cmpxchg_double_called_with_bad_pointer(void);
-
-#define __cmpxchg_double(p1, p2, o1, o2, n1, n2) \
-({ \
- int __ret; \
- switch (sizeof(*(p1))) { \
- case 4: \
- __ret = __cmpxchg_double_4(p1, p2, o1, o2, n1, n2); \
- break; \
- case 8: \
- __ret = __cmpxchg_double_8(p1, p2, o1, o2, n1, n2); \
- break; \
- default: \
- __cmpxchg_double_called_with_bad_pointer(); \
- } \
- __ret; \
-})
-
#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \
({ \
__typeof__(p1) __p1 = (p1); \
@@ -83,7 +57,7 @@ extern void __cmpxchg_double_called_with_bad_pointer(void);
BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \
BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \
VM_BUG_ON((unsigned long)((__p1) + 1) != (unsigned long)(__p2));\
- __cmpxchg_double_8(__p1, __p2, o1, o2, n1, n2); \
+ __cmpxchg_double(__p1, __p2, o1, o2, n1, n2); \
})
#define system_has_cmpxchg_double() 1
diff --git a/kernel/arch/s390/include/asm/cpu.h b/kernel/arch/s390/include/asm/cpu.h
index f5a8e2fcd..915410003 100644
--- a/kernel/arch/s390/include/asm/cpu.h
+++ b/kernel/arch/s390/include/asm/cpu.h
@@ -8,8 +8,6 @@
#ifndef _ASM_S390_CPU_H
#define _ASM_S390_CPU_H
-#define MAX_CPU_ADDRESS 255
-
#ifndef __ASSEMBLY__
#include <linux/types.h>
diff --git a/kernel/arch/s390/include/asm/cpu_mf.h b/kernel/arch/s390/include/asm/cpu_mf.h
index 5243a8679..9dd04b9e9 100644
--- a/kernel/arch/s390/include/asm/cpu_mf.h
+++ b/kernel/arch/s390/include/asm/cpu_mf.h
@@ -22,15 +22,10 @@
#define CPU_MF_INT_SF_LSDA (1 << 22) /* loss of sample data alert */
#define CPU_MF_INT_CF_CACA (1 << 7) /* counter auth. change alert */
#define CPU_MF_INT_CF_LCDA (1 << 6) /* loss of counter data alert */
-#define CPU_MF_INT_RI_HALTED (1 << 5) /* run-time instr. halted */
-#define CPU_MF_INT_RI_BUF_FULL (1 << 4) /* run-time instr. program
- buffer full */
-
#define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_CACA|CPU_MF_INT_CF_LCDA)
#define CPU_MF_INT_SF_MASK (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE| \
CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \
CPU_MF_INT_SF_LSDA)
-#define CPU_MF_INT_RI_MASK (CPU_MF_INT_RI_HALTED|CPU_MF_INT_RI_BUF_FULL)
/* CPU measurement facility support */
static inline int cpum_cf_avail(void)
diff --git a/kernel/arch/s390/include/asm/cpufeature.h b/kernel/arch/s390/include/asm/cpufeature.h
new file mode 100644
index 000000000..fa7e69b7c
--- /dev/null
+++ b/kernel/arch/s390/include/asm/cpufeature.h
@@ -0,0 +1,29 @@
+/*
+ * Module interface for CPU features
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_S390_CPUFEATURE_H
+#define __ASM_S390_CPUFEATURE_H
+
+#include <asm/elf.h>
+
+/* Hardware features on Linux on z Systems are indicated by facility bits that
+ * are mapped to the so-called machine flags. Particular machine flags are
+ * then used to define ELF hardware capabilities; most notably hardware flags
+ * that are essential for user space / glibc.
+ *
+ * Restrict the set of exposed CPU features to ELF hardware capabilities for
+ * now. Additional machine flags can be indicated by values larger than
+ * MAX_ELF_HWCAP_FEATURES.
+ */
+#define MAX_ELF_HWCAP_FEATURES (8 * sizeof(elf_hwcap))
+#define MAX_CPU_FEATURES MAX_ELF_HWCAP_FEATURES
+
+#define cpu_feature(feat) ilog2(HWCAP_S390_ ## feat)
+
+int cpu_have_feature(unsigned int nr);
+
+#endif /* __ASM_S390_CPUFEATURE_H */
diff --git a/kernel/arch/s390/include/asm/diag.h b/kernel/arch/s390/include/asm/diag.h
index 7e91c5807..5fac921c1 100644
--- a/kernel/arch/s390/include/asm/diag.h
+++ b/kernel/arch/s390/include/asm/diag.h
@@ -8,6 +8,34 @@
#ifndef _ASM_S390_DIAG_H
#define _ASM_S390_DIAG_H
+#include <linux/percpu.h>
+
+enum diag_stat_enum {
+ DIAG_STAT_X008,
+ DIAG_STAT_X00C,
+ DIAG_STAT_X010,
+ DIAG_STAT_X014,
+ DIAG_STAT_X044,
+ DIAG_STAT_X064,
+ DIAG_STAT_X09C,
+ DIAG_STAT_X0DC,
+ DIAG_STAT_X204,
+ DIAG_STAT_X210,
+ DIAG_STAT_X224,
+ DIAG_STAT_X250,
+ DIAG_STAT_X258,
+ DIAG_STAT_X288,
+ DIAG_STAT_X2C4,
+ DIAG_STAT_X2FC,
+ DIAG_STAT_X304,
+ DIAG_STAT_X308,
+ DIAG_STAT_X500,
+ NR_DIAG_STAT
+};
+
+void diag_stat_inc(enum diag_stat_enum nr);
+void diag_stat_inc_norecursion(enum diag_stat_enum nr);
+
/*
* Diagnose 10: Release page range
*/
@@ -18,6 +46,7 @@ static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn)
start_addr = start_pfn << PAGE_SHIFT;
end_addr = (start_pfn + num_pfn - 1) << PAGE_SHIFT;
+ diag_stat_inc(DIAG_STAT_X010);
asm volatile(
"0: diag %0,%1,0x10\n"
"1:\n"
diff --git a/kernel/arch/s390/include/asm/dma-mapping.h b/kernel/arch/s390/include/asm/dma-mapping.h
index 9d395961e..b3fd54d93 100644
--- a/kernel/arch/s390/include/asm/dma-mapping.h
+++ b/kernel/arch/s390/include/asm/dma-mapping.h
@@ -18,27 +18,13 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev)
return &s390_dma_ops;
}
-extern int dma_set_mask(struct device *dev, u64 mask);
-
static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
enum dma_data_direction direction)
{
}
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
#include <asm-generic/dma-mapping-common.h>
-static inline int dma_supported(struct device *dev, u64 mask)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- if (dma_ops->dma_supported == NULL)
- return 1;
- return dma_ops->dma_supported(dev, mask);
-}
-
static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
{
if (!dev->dma_mask)
@@ -46,45 +32,4 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
return addr + size - 1 <= *dev->dma_mask;
}
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- debug_dma_mapping_error(dev, dma_addr);
- if (dma_ops->mapping_error)
- return dma_ops->mapping_error(dev, dma_addr);
- return dma_addr == DMA_ERROR_CODE;
-}
-
-#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
- void *cpu_addr;
-
- BUG_ON(!ops);
-
- cpu_addr = ops->alloc(dev, size, dma_handle, flags, attrs);
- debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
-
- return cpu_addr;
-}
-
-#define dma_free_coherent(d, s, c, h) dma_free_attrs(d, s, c, h, NULL)
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- BUG_ON(!ops);
-
- debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
- ops->free(dev, size, cpu_addr, dma_handle, attrs);
-}
-
#endif /* _ASM_S390_DMA_MAPPING_H */
diff --git a/kernel/arch/s390/include/asm/elf.h b/kernel/arch/s390/include/asm/elf.h
index 3ad48f22d..bab6739a1 100644
--- a/kernel/arch/s390/include/asm/elf.h
+++ b/kernel/arch/s390/include/asm/elf.h
@@ -206,9 +206,16 @@ do { \
} while (0)
#endif /* CONFIG_COMPAT */
-extern unsigned long mmap_rnd_mask;
-
-#define STACK_RND_MASK (test_thread_flag(TIF_31BIT) ? 0x7ff : mmap_rnd_mask)
+/*
+ * Cache aliasing on the latest machines calls for a mapping granularity
+ * of 512KB. For 64-bit processes use a 512KB alignment and a randomization
+ * of up to 1GB. For 31-bit processes the virtual address space is limited,
+ * use no alignment and limit the randomization to 8MB.
+ */
+#define BRK_RND_MASK (is_32bit_task() ? 0x7ffUL : 0x3ffffUL)
+#define MMAP_RND_MASK (is_32bit_task() ? 0x7ffUL : 0x3ff80UL)
+#define MMAP_ALIGN_MASK (is_32bit_task() ? 0 : 0x7fUL)
+#define STACK_RND_MASK MMAP_RND_MASK
#define ARCH_DLINFO \
do { \
diff --git a/kernel/arch/s390/include/asm/etr.h b/kernel/arch/s390/include/asm/etr.h
index 629b79a93..105f90e63 100644
--- a/kernel/arch/s390/include/asm/etr.h
+++ b/kernel/arch/s390/include/asm/etr.h
@@ -211,8 +211,12 @@ static inline int etr_ptff(void *ptff_block, unsigned int func)
#define ETR_PTFF_SGS 0x43 /* set gross steering rate */
/* Functions needed by the machine check handler */
-void etr_switch_to_local(void);
-void etr_sync_check(void);
+int etr_switch_to_local(void);
+int etr_sync_check(void);
+void etr_queue_work(void);
+
+/* notifier for syncs */
+extern struct atomic_notifier_head s390_epoch_delta_notifier;
/* STP interruption parameter */
struct stp_irq_parm {
@@ -250,7 +254,8 @@ struct stp_sstpi {
} __attribute__ ((packed));
/* Functions needed by the machine check handler */
-void stp_sync_check(void);
-void stp_island_check(void);
+int stp_sync_check(void);
+int stp_island_check(void);
+void stp_queue_work(void);
#endif /* __S390_ETR_H */
diff --git a/kernel/arch/s390/include/asm/fpu/api.h b/kernel/arch/s390/include/asm/fpu/api.h
new file mode 100644
index 000000000..5e04f3cbd
--- /dev/null
+++ b/kernel/arch/s390/include/asm/fpu/api.h
@@ -0,0 +1,30 @@
+/*
+ * In-kernel FPU support functions
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_API_H
+#define _ASM_S390_FPU_API_H
+
+void save_fpu_regs(void);
+
+static inline int test_fp_ctl(u32 fpc)
+{
+ u32 orig_fpc;
+ int rc;
+
+ asm volatile(
+ " efpc %1\n"
+ " sfpc %2\n"
+ "0: sfpc %1\n"
+ " la %0,0\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "=d" (rc), "=d" (orig_fpc)
+ : "d" (fpc), "0" (-EINVAL));
+ return rc;
+}
+
+#endif /* _ASM_S390_FPU_API_H */
diff --git a/kernel/arch/s390/include/asm/fpu/internal.h b/kernel/arch/s390/include/asm/fpu/internal.h
new file mode 100644
index 000000000..17d9dcd29
--- /dev/null
+++ b/kernel/arch/s390/include/asm/fpu/internal.h
@@ -0,0 +1,69 @@
+/*
+ * FPU state and register content conversion primitives
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_INTERNAL_H
+#define _ASM_S390_FPU_INTERNAL_H
+
+#include <linux/string.h>
+#include <asm/ctl_reg.h>
+#include <asm/fpu/types.h>
+
+static inline void save_vx_regs_safe(__vector128 *vxrs)
+{
+ unsigned long cr0, flags;
+
+ flags = arch_local_irq_save();
+ __ctl_store(cr0, 0, 0);
+ __ctl_set_bit(0, 17);
+ __ctl_set_bit(0, 18);
+ asm volatile(
+ " la 1,%0\n"
+ " .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */
+ " .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */
+ : "=Q" (*(struct vx_array *) vxrs) : : "1");
+ __ctl_load(cr0, 0, 0);
+ arch_local_irq_restore(flags);
+}
+
+static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
+{
+ int i;
+
+ for (i = 0; i < __NUM_FPRS; i++)
+ fprs[i] = *(freg_t *)(vxrs + i);
+}
+
+static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
+{
+ int i;
+
+ for (i = 0; i < __NUM_FPRS; i++)
+ *(freg_t *)(vxrs + i) = fprs[i];
+}
+
+static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+ fpregs->pad = 0;
+ fpregs->fpc = fpu->fpc;
+ if (MACHINE_HAS_VX)
+ convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
+ else
+ memcpy((freg_t *)&fpregs->fprs, fpu->fprs,
+ sizeof(fpregs->fprs));
+}
+
+static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+ fpu->fpc = fpregs->fpc;
+ if (MACHINE_HAS_VX)
+ convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
+ else
+ memcpy(fpu->fprs, (freg_t *)&fpregs->fprs,
+ sizeof(fpregs->fprs));
+}
+
+#endif /* _ASM_S390_FPU_INTERNAL_H */
diff --git a/kernel/arch/s390/include/asm/fpu/types.h b/kernel/arch/s390/include/asm/fpu/types.h
new file mode 100644
index 000000000..14a8b0c14
--- /dev/null
+++ b/kernel/arch/s390/include/asm/fpu/types.h
@@ -0,0 +1,25 @@
+/*
+ * FPU data structures
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_TYPES_H
+#define _ASM_S390_FPU_TYPES_H
+
+#include <asm/sigcontext.h>
+
+struct fpu {
+ __u32 fpc; /* Floating-point control */
+ union {
+ void *regs;
+ freg_t *fprs; /* Floating-point register save area */
+ __vector128 *vxrs; /* Vector register save area */
+ };
+};
+
+/* VX array structure for address operand constraints in inline assemblies */
+struct vx_array { __vector128 _[__NUM_VXRS]; };
+
+#endif /* _ASM_S390_FPU_TYPES_H */
diff --git a/kernel/arch/s390/include/asm/hugetlb.h b/kernel/arch/s390/include/asm/hugetlb.h
index 11eae5f55..d9be7c0c1 100644
--- a/kernel/arch/s390/include/asm/hugetlb.h
+++ b/kernel/arch/s390/include/asm/hugetlb.h
@@ -14,6 +14,7 @@
#define is_hugepage_only_range(mm, addr, len) 0
#define hugetlb_free_pgd_range free_pgd_range
+#define hugepages_supported() (MACHINE_HAS_HPAGE)
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte);
@@ -35,12 +36,8 @@ static inline int prepare_hugepage_range(struct file *file,
return 0;
}
-#define hugetlb_prefault_arch_hook(mm) do { } while (0)
#define arch_clear_hugepage_flags(page) do { } while (0)
-int arch_prepare_hugepage(struct page *page);
-void arch_release_hugepage(struct page *page);
-
static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
diff --git a/kernel/arch/s390/include/asm/idle.h b/kernel/arch/s390/include/asm/idle.h
index 113cd963d..51ff96d9f 100644
--- a/kernel/arch/s390/include/asm/idle.h
+++ b/kernel/arch/s390/include/asm/idle.h
@@ -24,4 +24,6 @@ struct s390_idle_data {
extern struct device_attribute dev_attr_idle_count;
extern struct device_attribute dev_attr_idle_time_us;
+void psw_idle(struct s390_idle_data *, unsigned long);
+
#endif /* _S390_IDLE_H */
diff --git a/kernel/arch/s390/include/asm/io.h b/kernel/arch/s390/include/asm/io.h
index 30fd5c846..437e9af96 100644
--- a/kernel/arch/s390/include/asm/io.h
+++ b/kernel/arch/s390/include/asm/io.h
@@ -29,6 +29,7 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
#define ioremap_nocache(addr, size) ioremap(addr, size)
#define ioremap_wc ioremap_nocache
+#define ioremap_wt ioremap_nocache
static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
{
@@ -56,6 +57,8 @@ static inline void ioport_unmap(void __iomem *p)
*/
#define pci_iomap pci_iomap
#define pci_iounmap pci_iounmap
+#define pci_iomap_wc pci_iomap
+#define pci_iomap_wc_range pci_iomap_range
#define memcpy_fromio(dst, src, count) zpci_memcpy_fromio(dst, src, count)
#define memcpy_toio(dst, src, count) zpci_memcpy_toio(dst, src, count)
diff --git a/kernel/arch/s390/include/asm/ipl.h b/kernel/arch/s390/include/asm/ipl.h
index ece606c2e..86634e71b 100644
--- a/kernel/arch/s390/include/asm/ipl.h
+++ b/kernel/arch/s390/include/asm/ipl.h
@@ -64,7 +64,8 @@ struct ipl_block_fcp {
struct ipl_block_ccw {
u8 reserved1[84];
- u8 reserved2[2];
+ u16 reserved2 : 13;
+ u8 ssid : 3;
u16 devno;
u8 vm_flags;
u8 reserved3[3];
@@ -94,7 +95,6 @@ struct dump_save_areas {
};
extern struct dump_save_areas dump_save_areas;
-struct save_area_ext *dump_save_area_create(int cpu);
extern void do_reipl(void);
extern void do_halt(void);
diff --git a/kernel/arch/s390/include/asm/irq.h b/kernel/arch/s390/include/asm/irq.h
index ff95d15a2..f97b055de 100644
--- a/kernel/arch/s390/include/asm/irq.h
+++ b/kernel/arch/s390/include/asm/irq.h
@@ -47,7 +47,6 @@ enum interruption_class {
IRQEXT_IUC,
IRQEXT_CMS,
IRQEXT_CMC,
- IRQEXT_CMR,
IRQEXT_FTP,
IRQIO_CIO,
IRQIO_QAI,
@@ -96,6 +95,19 @@ enum irq_subclass {
IRQ_SUBCLASS_SERVICE_SIGNAL = 9,
};
+#define CR0_IRQ_SUBCLASS_MASK \
+ ((1UL << (63 - 30)) /* Warning Track */ | \
+ (1UL << (63 - 48)) /* Malfunction Alert */ | \
+ (1UL << (63 - 49)) /* Emergency Signal */ | \
+ (1UL << (63 - 50)) /* External Call */ | \
+ (1UL << (63 - 52)) /* Clock Comparator */ | \
+ (1UL << (63 - 53)) /* CPU Timer */ | \
+ (1UL << (63 - 54)) /* Service Signal */ | \
+ (1UL << (63 - 57)) /* Interrupt Key */ | \
+ (1UL << (63 - 58)) /* Measurement Alert */ | \
+ (1UL << (63 - 59)) /* Timing Alert */ | \
+ (1UL << (63 - 62))) /* IUCV */
+
void irq_subclass_register(enum irq_subclass subclass);
void irq_subclass_unregister(enum irq_subclass subclass);
diff --git a/kernel/arch/s390/include/asm/jump_label.h b/kernel/arch/s390/include/asm/jump_label.h
index 69972b795..7f9fd5e3f 100644
--- a/kernel/arch/s390/include/asm/jump_label.h
+++ b/kernel/arch/s390/include/asm/jump_label.h
@@ -12,14 +12,29 @@
* We use a brcl 0,2 instruction for jump labels at compile time so it
* can be easily distinguished from a hotpatch generated instruction.
*/
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
asm_volatile_goto("0: brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n"
".pushsection __jump_table, \"aw\"\n"
".balign 8\n"
".quad 0b, %l[label], %0\n"
".popsection\n"
- : : "X" (key) : : label);
+ : : "X" (&((char *)key)[branch]) : : label);
+
+ return false;
+label:
+ return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+ asm_volatile_goto("0: brcl 15, %l[label]\n"
+ ".pushsection __jump_table, \"aw\"\n"
+ ".balign 8\n"
+ ".quad 0b, %l[label], %0\n"
+ ".popsection\n"
+ : : "X" (&((char *)key)[branch]) : : label);
+
return false;
label:
return true;
diff --git a/kernel/arch/s390/include/asm/kvm_host.h b/kernel/arch/s390/include/asm/kvm_host.h
index 905007eea..bbdc539fb 100644
--- a/kernel/arch/s390/include/asm/kvm_host.h
+++ b/kernel/arch/s390/include/asm/kvm_host.h
@@ -22,6 +22,7 @@
#include <linux/kvm.h>
#include <asm/debug.h>
#include <asm/cpu.h>
+#include <asm/fpu/api.h>
#include <asm/isc.h>
#define KVM_MAX_VCPUS 64
@@ -34,6 +35,7 @@
*/
#define KVM_NR_IRQCHIPS 1
#define KVM_IRQCHIP_NUM_PINS 4096
+#define KVM_HALT_POLL_NS_DEFAULT 0
#define SIGP_CTRL_C 0x80
#define SIGP_CTRL_SCN_MASK 0x3f
@@ -80,6 +82,7 @@ struct sca_block {
#define CPUSTAT_MCDS 0x00000100
#define CPUSTAT_SM 0x00000080
#define CPUSTAT_IBS 0x00000040
+#define CPUSTAT_GED2 0x00000010
#define CPUSTAT_G 0x00000008
#define CPUSTAT_GED 0x00000004
#define CPUSTAT_J 0x00000002
@@ -95,7 +98,8 @@ struct kvm_s390_sie_block {
#define PROG_IN_SIE (1<<0)
__u32 prog0c; /* 0x000c */
__u8 reserved10[16]; /* 0x0010 */
-#define PROG_BLOCK_SIE 0x00000001
+#define PROG_BLOCK_SIE (1<<0)
+#define PROG_REQUEST (1<<1)
atomic_t prog20; /* 0x0020 */
__u8 reserved24[4]; /* 0x0024 */
__u64 cputm; /* 0x0028 */
@@ -207,6 +211,7 @@ struct kvm_vcpu_stat {
u32 exit_validity;
u32 exit_instruction;
u32 halt_successful_poll;
+ u32 halt_attempted_poll;
u32 halt_wakeup;
u32 instruction_lctl;
u32 instruction_lctlg;
@@ -256,6 +261,9 @@ struct kvm_vcpu_stat {
u32 diagnose_10;
u32 diagnose_44;
u32 diagnose_9c;
+ u32 diagnose_258;
+ u32 diagnose_308;
+ u32 diagnose_500;
};
#define PGM_OPERATION 0x01
@@ -419,7 +427,7 @@ struct kvm_s390_irq_payload {
struct kvm_s390_local_interrupt {
spinlock_t lock;
struct kvm_s390_float_interrupt *float_int;
- struct swait_head *wq;
+ struct swait_queue_head *wq;
atomic_t *cpuflags;
DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
struct kvm_s390_irq_payload irq;
@@ -496,10 +504,8 @@ struct kvm_guestdbg_info_arch {
struct kvm_vcpu_arch {
struct kvm_s390_sie_block *sie_block;
- s390_fp_regs host_fpregs;
unsigned int host_acrs[NUM_ACRS];
- s390_fp_regs guest_fpregs;
- struct kvm_s390_vregs *host_vregs;
+ struct fpu host_fpregs;
struct kvm_s390_local_interrupt local_int;
struct hrtimer ckc_timer;
struct kvm_s390_pgm_info pgm;
@@ -628,15 +634,16 @@ extern char sie_exit;
static inline void kvm_arch_hardware_disable(void) {}
static inline void kvm_arch_check_processor_compat(void *rtn) {}
-static inline void kvm_arch_exit(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_free_memslot(struct kvm *kvm,
struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
-static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {}
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot) {}
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
#endif
diff --git a/kernel/arch/s390/include/asm/kvm_para.h b/kernel/arch/s390/include/asm/kvm_para.h
index e0f842308..41393052a 100644
--- a/kernel/arch/s390/include/asm/kvm_para.h
+++ b/kernel/arch/s390/include/asm/kvm_para.h
@@ -27,10 +27,9 @@
#define __S390_KVM_PARA_H
#include <uapi/asm/kvm_para.h>
+#include <asm/diag.h>
-
-
-static inline long kvm_hypercall0(unsigned long nr)
+static inline long __kvm_hypercall0(unsigned long nr)
{
register unsigned long __nr asm("1") = nr;
register long __rc asm("2");
@@ -40,7 +39,13 @@ static inline long kvm_hypercall0(unsigned long nr)
return __rc;
}
-static inline long kvm_hypercall1(unsigned long nr, unsigned long p1)
+static inline long kvm_hypercall0(unsigned long nr)
+{
+ diag_stat_inc(DIAG_STAT_X500);
+ return __kvm_hypercall0(nr);
+}
+
+static inline long __kvm_hypercall1(unsigned long nr, unsigned long p1)
{
register unsigned long __nr asm("1") = nr;
register unsigned long __p1 asm("2") = p1;
@@ -51,7 +56,13 @@ static inline long kvm_hypercall1(unsigned long nr, unsigned long p1)
return __rc;
}
-static inline long kvm_hypercall2(unsigned long nr, unsigned long p1,
+static inline long kvm_hypercall1(unsigned long nr, unsigned long p1)
+{
+ diag_stat_inc(DIAG_STAT_X500);
+ return __kvm_hypercall1(nr, p1);
+}
+
+static inline long __kvm_hypercall2(unsigned long nr, unsigned long p1,
unsigned long p2)
{
register unsigned long __nr asm("1") = nr;
@@ -65,7 +76,14 @@ static inline long kvm_hypercall2(unsigned long nr, unsigned long p1,
return __rc;
}
-static inline long kvm_hypercall3(unsigned long nr, unsigned long p1,
+static inline long kvm_hypercall2(unsigned long nr, unsigned long p1,
+ unsigned long p2)
+{
+ diag_stat_inc(DIAG_STAT_X500);
+ return __kvm_hypercall2(nr, p1, p2);
+}
+
+static inline long __kvm_hypercall3(unsigned long nr, unsigned long p1,
unsigned long p2, unsigned long p3)
{
register unsigned long __nr asm("1") = nr;
@@ -80,8 +98,14 @@ static inline long kvm_hypercall3(unsigned long nr, unsigned long p1,
return __rc;
}
+static inline long kvm_hypercall3(unsigned long nr, unsigned long p1,
+ unsigned long p2, unsigned long p3)
+{
+ diag_stat_inc(DIAG_STAT_X500);
+ return __kvm_hypercall3(nr, p1, p2, p3);
+}
-static inline long kvm_hypercall4(unsigned long nr, unsigned long p1,
+static inline long __kvm_hypercall4(unsigned long nr, unsigned long p1,
unsigned long p2, unsigned long p3,
unsigned long p4)
{
@@ -98,7 +122,15 @@ static inline long kvm_hypercall4(unsigned long nr, unsigned long p1,
return __rc;
}
-static inline long kvm_hypercall5(unsigned long nr, unsigned long p1,
+static inline long kvm_hypercall4(unsigned long nr, unsigned long p1,
+ unsigned long p2, unsigned long p3,
+ unsigned long p4)
+{
+ diag_stat_inc(DIAG_STAT_X500);
+ return __kvm_hypercall4(nr, p1, p2, p3, p4);
+}
+
+static inline long __kvm_hypercall5(unsigned long nr, unsigned long p1,
unsigned long p2, unsigned long p3,
unsigned long p4, unsigned long p5)
{
@@ -116,7 +148,15 @@ static inline long kvm_hypercall5(unsigned long nr, unsigned long p1,
return __rc;
}
-static inline long kvm_hypercall6(unsigned long nr, unsigned long p1,
+static inline long kvm_hypercall5(unsigned long nr, unsigned long p1,
+ unsigned long p2, unsigned long p3,
+ unsigned long p4, unsigned long p5)
+{
+ diag_stat_inc(DIAG_STAT_X500);
+ return __kvm_hypercall5(nr, p1, p2, p3, p4, p5);
+}
+
+static inline long __kvm_hypercall6(unsigned long nr, unsigned long p1,
unsigned long p2, unsigned long p3,
unsigned long p4, unsigned long p5,
unsigned long p6)
@@ -137,6 +177,15 @@ static inline long kvm_hypercall6(unsigned long nr, unsigned long p1,
return __rc;
}
+static inline long kvm_hypercall6(unsigned long nr, unsigned long p1,
+ unsigned long p2, unsigned long p3,
+ unsigned long p4, unsigned long p5,
+ unsigned long p6)
+{
+ diag_stat_inc(DIAG_STAT_X500);
+ return __kvm_hypercall6(nr, p1, p2, p3, p4, p5, p6);
+}
+
/* kvm on s390 is always paravirtualization enabled */
static inline int kvm_para_available(void)
{
diff --git a/kernel/arch/s390/include/asm/linkage.h b/kernel/arch/s390/include/asm/linkage.h
index fc8a82847..27da78cf4 100644
--- a/kernel/arch/s390/include/asm/linkage.h
+++ b/kernel/arch/s390/include/asm/linkage.h
@@ -6,4 +6,26 @@
#define __ALIGN .align 4, 0x07
#define __ALIGN_STR __stringify(__ALIGN)
+#ifndef __ASSEMBLY__
+
+/*
+ * Helper macro for exception table entries
+ */
+#define EX_TABLE(_fault, _target) \
+ ".section __ex_table,\"a\"\n" \
+ ".align 4\n" \
+ ".long (" #_fault ") - .\n" \
+ ".long (" #_target ") - .\n" \
+ ".previous\n"
+
+#else /* __ASSEMBLY__ */
+
+#define EX_TABLE(_fault, _target) \
+ .section __ex_table,"a" ; \
+ .align 4 ; \
+ .long (_fault) - . ; \
+ .long (_target) - . ; \
+ .previous
+
+#endif /* __ASSEMBLY__ */
#endif
diff --git a/kernel/arch/s390/include/asm/lowcore.h b/kernel/arch/s390/include/asm/lowcore.h
index 663f23e37..afe1cfebf 100644
--- a/kernel/arch/s390/include/asm/lowcore.h
+++ b/kernel/arch/s390/include/asm/lowcore.h
@@ -67,7 +67,7 @@ struct _lowcore {
__u8 pad_0x00c4[0x00c8-0x00c4]; /* 0x00c4 */
__u32 stfl_fac_list; /* 0x00c8 */
__u8 pad_0x00cc[0x00e8-0x00cc]; /* 0x00cc */
- __u32 mcck_interruption_code[2]; /* 0x00e8 */
+ __u64 mcck_interruption_code; /* 0x00e8 */
__u8 pad_0x00f0[0x00f4-0x00f0]; /* 0x00f0 */
__u32 external_damage_code; /* 0x00f4 */
__u64 failing_storage_address; /* 0x00f8 */
@@ -132,7 +132,14 @@ struct _lowcore {
/* Address space pointer. */
__u64 kernel_asce; /* 0x0358 */
__u64 user_asce; /* 0x0360 */
- __u64 current_pid; /* 0x0368 */
+
+ /*
+ * The lpp and current_pid fields form a
+ * 64-bit value that is set as program
+ * parameter with the LPP instruction.
+ */
+ __u32 lpp; /* 0x0368 */
+ __u32 current_pid; /* 0x036c */
/* SMP info area */
__u32 cpu_nr; /* 0x0370 */
diff --git a/kernel/arch/s390/include/asm/mmu_context.h b/kernel/arch/s390/include/asm/mmu_context.h
index fb1b93ea3..e485817f7 100644
--- a/kernel/arch/s390/include/asm/mmu_context.h
+++ b/kernel/arch/s390/include/asm/mmu_context.h
@@ -15,17 +15,25 @@
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
+ spin_lock_init(&mm->context.list_lock);
+ INIT_LIST_HEAD(&mm->context.pgtable_list);
+ INIT_LIST_HEAD(&mm->context.gmap_list);
cpumask_clear(&mm->context.cpu_attach_mask);
atomic_set(&mm->context.attach_count, 0);
mm->context.flush_mm = 0;
- mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS;
- mm->context.asce_bits |= _ASCE_TYPE_REGION3;
#ifdef CONFIG_PGSTE
mm->context.alloc_pgste = page_table_allocate_pgste;
mm->context.has_pgste = 0;
mm->context.use_skey = 0;
#endif
- mm->context.asce_limit = STACK_TOP_MAX;
+ if (mm->context.asce_limit == 0) {
+ /* context created by exec, set asce limit to 4TB */
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
+ mm->context.asce_limit = STACK_TOP_MAX;
+ } else if (mm->context.asce_limit == (1UL << 31)) {
+ mm_inc_nr_pmds(mm);
+ }
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
}
@@ -111,8 +119,6 @@ static inline void activate_mm(struct mm_struct *prev,
static inline void arch_dup_mmap(struct mm_struct *oldmm,
struct mm_struct *mm)
{
- if (oldmm->context.asce_limit < mm->context.asce_limit)
- crst_table_downgrade(mm, oldmm->context.asce_limit);
}
static inline void arch_exit_mmap(struct mm_struct *mm)
diff --git a/kernel/arch/s390/include/asm/mmzone.h b/kernel/arch/s390/include/asm/mmzone.h
new file mode 100644
index 000000000..a9e834e60
--- /dev/null
+++ b/kernel/arch/s390/include/asm/mmzone.h
@@ -0,0 +1,16 @@
+/*
+ * NUMA support for s390
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#ifndef _ASM_S390_MMZONE_H
+#define _ASM_S390_MMZONE_H
+
+#ifdef CONFIG_NUMA
+
+extern struct pglist_data *node_data[];
+#define NODE_DATA(nid) (node_data[nid])
+
+#endif /* CONFIG_NUMA */
+#endif /* _ASM_S390_MMZONE_H */
diff --git a/kernel/arch/s390/include/asm/nmi.h b/kernel/arch/s390/include/asm/nmi.h
index 3027a5a72..b75fd9103 100644
--- a/kernel/arch/s390/include/asm/nmi.h
+++ b/kernel/arch/s390/include/asm/nmi.h
@@ -11,51 +11,62 @@
#ifndef _ASM_S390_NMI_H
#define _ASM_S390_NMI_H
+#include <linux/const.h>
#include <linux/types.h>
-struct mci {
- __u32 sd : 1; /* 00 system damage */
- __u32 pd : 1; /* 01 instruction-processing damage */
- __u32 sr : 1; /* 02 system recovery */
- __u32 : 1; /* 03 */
- __u32 cd : 1; /* 04 timing-facility damage */
- __u32 ed : 1; /* 05 external damage */
- __u32 : 1; /* 06 */
- __u32 dg : 1; /* 07 degradation */
- __u32 w : 1; /* 08 warning pending */
- __u32 cp : 1; /* 09 channel-report pending */
- __u32 sp : 1; /* 10 service-processor damage */
- __u32 ck : 1; /* 11 channel-subsystem damage */
- __u32 : 2; /* 12-13 */
- __u32 b : 1; /* 14 backed up */
- __u32 : 1; /* 15 */
- __u32 se : 1; /* 16 storage error uncorrected */
- __u32 sc : 1; /* 17 storage error corrected */
- __u32 ke : 1; /* 18 storage-key error uncorrected */
- __u32 ds : 1; /* 19 storage degradation */
- __u32 wp : 1; /* 20 psw mwp validity */
- __u32 ms : 1; /* 21 psw mask and key validity */
- __u32 pm : 1; /* 22 psw program mask and cc validity */
- __u32 ia : 1; /* 23 psw instruction address validity */
- __u32 fa : 1; /* 24 failing storage address validity */
- __u32 vr : 1; /* 25 vector register validity */
- __u32 ec : 1; /* 26 external damage code validity */
- __u32 fp : 1; /* 27 floating point register validity */
- __u32 gr : 1; /* 28 general register validity */
- __u32 cr : 1; /* 29 control register validity */
- __u32 : 1; /* 30 */
- __u32 st : 1; /* 31 storage logical validity */
- __u32 ie : 1; /* 32 indirect storage error */
- __u32 ar : 1; /* 33 access register validity */
- __u32 da : 1; /* 34 delayed access exception */
- __u32 : 7; /* 35-41 */
- __u32 pr : 1; /* 42 tod programmable register validity */
- __u32 fc : 1; /* 43 fp control register validity */
- __u32 ap : 1; /* 44 ancillary report */
- __u32 : 1; /* 45 */
- __u32 ct : 1; /* 46 cpu timer validity */
- __u32 cc : 1; /* 47 clock comparator validity */
- __u32 : 16; /* 47-63 */
+#define MCCK_CODE_SYSTEM_DAMAGE _BITUL(63)
+#define MCCK_CODE_CPU_TIMER_VALID _BITUL(63 - 46)
+#define MCCK_CODE_PSW_MWP_VALID _BITUL(63 - 20)
+#define MCCK_CODE_PSW_IA_VALID _BITUL(63 - 23)
+
+#ifndef __ASSEMBLY__
+
+union mci {
+ unsigned long val;
+ struct {
+ u64 sd : 1; /* 00 system damage */
+ u64 pd : 1; /* 01 instruction-processing damage */
+ u64 sr : 1; /* 02 system recovery */
+ u64 : 1; /* 03 */
+ u64 cd : 1; /* 04 timing-facility damage */
+ u64 ed : 1; /* 05 external damage */
+ u64 : 1; /* 06 */
+ u64 dg : 1; /* 07 degradation */
+ u64 w : 1; /* 08 warning pending */
+ u64 cp : 1; /* 09 channel-report pending */
+ u64 sp : 1; /* 10 service-processor damage */
+ u64 ck : 1; /* 11 channel-subsystem damage */
+ u64 : 2; /* 12-13 */
+ u64 b : 1; /* 14 backed up */
+ u64 : 1; /* 15 */
+ u64 se : 1; /* 16 storage error uncorrected */
+ u64 sc : 1; /* 17 storage error corrected */
+ u64 ke : 1; /* 18 storage-key error uncorrected */
+ u64 ds : 1; /* 19 storage degradation */
+ u64 wp : 1; /* 20 psw mwp validity */
+ u64 ms : 1; /* 21 psw mask and key validity */
+ u64 pm : 1; /* 22 psw program mask and cc validity */
+ u64 ia : 1; /* 23 psw instruction address validity */
+ u64 fa : 1; /* 24 failing storage address validity */
+ u64 vr : 1; /* 25 vector register validity */
+ u64 ec : 1; /* 26 external damage code validity */
+ u64 fp : 1; /* 27 floating point register validity */
+ u64 gr : 1; /* 28 general register validity */
+ u64 cr : 1; /* 29 control register validity */
+ u64 : 1; /* 30 */
+ u64 st : 1; /* 31 storage logical validity */
+ u64 ie : 1; /* 32 indirect storage error */
+ u64 ar : 1; /* 33 access register validity */
+ u64 da : 1; /* 34 delayed access exception */
+ u64 : 7; /* 35-41 */
+ u64 pr : 1; /* 42 tod programmable register validity */
+ u64 fc : 1; /* 43 fp control register validity */
+ u64 ap : 1; /* 44 ancillary report */
+ u64 : 1; /* 45 */
+ u64 ct : 1; /* 46 cpu timer validity */
+ u64 cc : 1; /* 47 clock comparator validity */
+ u64 : 16; /* 47-63 */
+ };
};
struct pt_regs;
@@ -63,4 +74,5 @@ struct pt_regs;
extern void s390_handle_mcck(void);
extern void s390_do_machine_check(struct pt_regs *regs);
+#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_NMI_H */
diff --git a/kernel/arch/s390/include/asm/numa.h b/kernel/arch/s390/include/asm/numa.h
new file mode 100644
index 000000000..dc19ee0c9
--- /dev/null
+++ b/kernel/arch/s390/include/asm/numa.h
@@ -0,0 +1,35 @@
+/*
+ * NUMA support for s390
+ *
+ * Declare the NUMA core code structures and functions.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#ifndef _ASM_S390_NUMA_H
+#define _ASM_S390_NUMA_H
+
+#ifdef CONFIG_NUMA
+
+#include <linux/numa.h>
+#include <linux/cpumask.h>
+
+void numa_setup(void);
+int numa_pfn_to_nid(unsigned long pfn);
+int __node_distance(int a, int b);
+void numa_update_cpu_topology(void);
+
+extern cpumask_t node_to_cpumask_map[MAX_NUMNODES];
+extern int numa_debug_enabled;
+
+#else
+
+static inline void numa_setup(void) { }
+static inline void numa_update_cpu_topology(void) { }
+static inline int numa_pfn_to_nid(unsigned long pfn)
+{
+ return 0;
+}
+
+#endif /* CONFIG_NUMA */
+#endif /* _ASM_S390_NUMA_H */
diff --git a/kernel/arch/s390/include/asm/pci.h b/kernel/arch/s390/include/asm/pci.h
index a648338c4..c873e682b 100644
--- a/kernel/arch/s390/include/asm/pci.h
+++ b/kernel/arch/s390/include/asm/pci.h
@@ -62,6 +62,8 @@ struct zpci_bar_struct {
u8 size; /* order 2 exponent */
};
+struct s390_domain;
+
/* Private data per function */
struct zpci_dev {
struct pci_dev *pdev;
@@ -118,6 +120,8 @@ struct zpci_dev {
struct dentry *debugfs_dev;
struct dentry *debugfs_perf;
+
+ struct s390_domain *s390_domain; /* s390 IOMMU domain data */
};
static inline bool zdev_enabled(struct zpci_dev *zdev)
@@ -170,7 +174,11 @@ static inline void zpci_exit_slot(struct zpci_dev *zdev) {}
#endif /* CONFIG_HOTPLUG_PCI_S390 */
/* Helpers */
-struct zpci_dev *get_zdev(struct pci_dev *);
+static inline struct zpci_dev *to_zpci(struct pci_dev *pdev)
+{
+ return pdev->sysdata;
+}
+
struct zpci_dev *get_zdev_by_fid(u32);
/* DMA */
@@ -188,4 +196,20 @@ void zpci_debug_init_device(struct zpci_dev *);
void zpci_debug_exit_device(struct zpci_dev *);
void zpci_debug_info(struct zpci_dev *, struct seq_file *);
+#ifdef CONFIG_NUMA
+
+/* Returns the node based on PCI bus */
+static inline int __pcibus_to_node(const struct pci_bus *bus)
+{
+ return NUMA_NO_NODE;
+}
+
+static inline const struct cpumask *
+cpumask_of_pcibus(const struct pci_bus *bus)
+{
+ return cpu_online_mask;
+}
+
+#endif /* CONFIG_NUMA */
+
#endif
diff --git a/kernel/arch/s390/include/asm/pci_dma.h b/kernel/arch/s390/include/asm/pci_dma.h
index 30b4c179c..1aac41e83 100644
--- a/kernel/arch/s390/include/asm/pci_dma.h
+++ b/kernel/arch/s390/include/asm/pci_dma.h
@@ -192,5 +192,10 @@ static inline unsigned long *get_st_pto(unsigned long entry)
/* Prototypes */
int zpci_dma_init_device(struct zpci_dev *);
void zpci_dma_exit_device(struct zpci_dev *);
+void dma_free_seg_table(unsigned long);
+unsigned long *dma_alloc_cpu_table(void);
+void dma_cleanup_tables(unsigned long *);
+unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr);
+void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags);
#endif
diff --git a/kernel/arch/s390/include/asm/perf_event.h b/kernel/arch/s390/include/asm/perf_event.h
index 4cb19fe76..f897ec73d 100644
--- a/kernel/arch/s390/include/asm/perf_event.h
+++ b/kernel/arch/s390/include/asm/perf_event.h
@@ -87,7 +87,15 @@ struct sf_raw_sample {
} __packed;
/* Perf hardware reserve and release functions */
+#ifdef CONFIG_PERF_EVENTS
int perf_reserve_sampling(void);
void perf_release_sampling(void);
+#else /* CONFIG_PERF_EVENTS */
+static inline int perf_reserve_sampling(void)
+{
+ return 0;
+}
+static inline void perf_release_sampling(void) {}
+#endif /* CONFIG_PERF_EVENTS */
#endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/kernel/arch/s390/include/asm/pgalloc.h b/kernel/arch/s390/include/asm/pgalloc.h
index 7b7858f15..d7cc79fb6 100644
--- a/kernel/arch/s390/include/asm/pgalloc.h
+++ b/kernel/arch/s390/include/asm/pgalloc.h
@@ -100,12 +100,26 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- spin_lock_init(&mm->context.list_lock);
- INIT_LIST_HEAD(&mm->context.pgtable_list);
- INIT_LIST_HEAD(&mm->context.gmap_list);
- return (pgd_t *) crst_table_alloc(mm);
+ unsigned long *table = crst_table_alloc(mm);
+
+ if (!table)
+ return NULL;
+ if (mm->context.asce_limit == (1UL << 31)) {
+ /* Forking a compat process with 2 page table levels */
+ if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
+ crst_table_free(mm, table);
+ return NULL;
+ }
+ }
+ return (pgd_t *) table;
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ if (mm->context.asce_limit == (1UL << 31))
+ pgtable_pmd_page_dtor(virt_to_page(pgd));
+ crst_table_free(mm, (unsigned long *) pgd);
}
-#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd)
static inline void pmd_populate(struct mm_struct *mm,
pmd_t *pmd, pgtable_t pte)
diff --git a/kernel/arch/s390/include/asm/pgtable.h b/kernel/arch/s390/include/asm/pgtable.h
index ef24a212e..024f85f94 100644
--- a/kernel/arch/s390/include/asm/pgtable.h
+++ b/kernel/arch/s390/include/asm/pgtable.h
@@ -193,9 +193,15 @@ static inline int is_module_addr(void *addr)
#define _PAGE_UNUSED 0x080 /* SW bit for pgste usage state */
#define __HAVE_ARCH_PTE_SPECIAL
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _PAGE_SOFT_DIRTY 0x002 /* SW pte soft dirty bit */
+#else
+#define _PAGE_SOFT_DIRTY 0x000
+#endif
+
/* Set of bits not changed in pte_modify */
#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_DIRTY | \
- _PAGE_YOUNG)
+ _PAGE_YOUNG | _PAGE_SOFT_DIRTY)
/*
* handle_pte_fault uses pte_present and pte_none to find out the pte type
@@ -285,6 +291,12 @@ static inline int is_module_addr(void *addr)
#define _SEGMENT_ENTRY_READ 0x0002 /* SW segment read bit */
#define _SEGMENT_ENTRY_WRITE 0x0001 /* SW segment write bit */
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _SEGMENT_ENTRY_SOFT_DIRTY 0x4000 /* SW segment soft dirty bit */
+#else
+#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */
+#endif
+
/*
* Segment table entry encoding (R = read-only, I = invalid, y = young bit):
* dy..R...I...wr
@@ -576,6 +588,56 @@ static inline int pte_same(pte_t a, pte_t b)
return pte_val(a) == pte_val(b);
}
+#ifdef CONFIG_NUMA_BALANCING
+static inline int pte_protnone(pte_t pte)
+{
+ return pte_present(pte) && !(pte_val(pte) & _PAGE_READ);
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+ /* pmd_large(pmd) implies pmd_present(pmd) */
+ return pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_READ);
+}
+#endif
+
+static inline int pte_soft_dirty(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SOFT_DIRTY;
+}
+#define pte_swp_soft_dirty pte_soft_dirty
+
+static inline pte_t pte_mksoft_dirty(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_SOFT_DIRTY;
+ return pte;
+}
+#define pte_swp_mksoft_dirty pte_mksoft_dirty
+
+static inline pte_t pte_clear_soft_dirty(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_SOFT_DIRTY;
+ return pte;
+}
+#define pte_swp_clear_soft_dirty pte_clear_soft_dirty
+
+static inline int pmd_soft_dirty(pmd_t pmd)
+{
+ return pmd_val(pmd) & _SEGMENT_ENTRY_SOFT_DIRTY;
+}
+
+static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
+{
+ pmd_val(pmd) |= _SEGMENT_ENTRY_SOFT_DIRTY;
+ return pmd;
+}
+
+static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_SOFT_DIRTY;
+ return pmd;
+}
+
static inline pgste_t pgste_get_lock(pte_t *ptep)
{
unsigned long new = 0;
@@ -876,7 +938,7 @@ static inline pte_t pte_mkclean(pte_t pte)
static inline pte_t pte_mkdirty(pte_t pte)
{
- pte_val(pte) |= _PAGE_DIRTY;
+ pte_val(pte) |= _PAGE_DIRTY | _PAGE_SOFT_DIRTY;
if (pte_val(pte) & _PAGE_WRITE)
pte_val(pte) &= ~_PAGE_PROTECT;
return pte;
@@ -1205,8 +1267,10 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
pte_t entry, int dirty)
{
pgste_t pgste;
+ pte_t oldpte;
- if (pte_same(*ptep, entry))
+ oldpte = *ptep;
+ if (pte_same(oldpte, entry))
return 0;
if (mm_has_pgste(vma->vm_mm)) {
pgste = pgste_get_lock(ptep);
@@ -1216,7 +1280,8 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
ptep_flush_direct(vma->vm_mm, address, ptep);
if (mm_has_pgste(vma->vm_mm)) {
- pgste_set_key(ptep, pgste, entry, vma->vm_mm);
+ if (pte_val(oldpte) & _PAGE_INVALID)
+ pgste_set_key(ptep, pgste, entry, vma->vm_mm);
pgste = pgste_set_pte(ptep, pgste, entry);
pgste_set_unlock(ptep, pgste);
} else
@@ -1327,7 +1392,8 @@ static inline pmd_t pmd_mkclean(pmd_t pmd)
static inline pmd_t pmd_mkdirty(pmd_t pmd)
{
if (pmd_large(pmd)) {
- pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY |
+ _SEGMENT_ENTRY_SOFT_DIRTY;
if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
}
@@ -1358,7 +1424,8 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
if (pmd_large(pmd)) {
pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
_SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
- _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SPLIT;
+ _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SPLIT |
+ _SEGMENT_ENTRY_SOFT_DIRTY;
pmd_val(pmd) |= massage_pgprot_pmd(newprot);
if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
@@ -1498,9 +1565,9 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
return pmd_young(pmd);
}
-#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
-static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
- unsigned long address, pmd_t *pmdp)
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
+static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
{
pmd_t pmd = *pmdp;
@@ -1509,10 +1576,10 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
return pmd;
}
-#define __HAVE_ARCH_PMDP_GET_AND_CLEAR_FULL
-static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm,
- unsigned long address,
- pmd_t *pmdp, int full)
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
+static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
+ unsigned long address,
+ pmd_t *pmdp, int full)
{
pmd_t pmd = *pmdp;
@@ -1522,11 +1589,11 @@ static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm,
return pmd;
}
-#define __HAVE_ARCH_PMDP_CLEAR_FLUSH
-static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
+#define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
+static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
{
- return pmdp_get_and_clear(vma->vm_mm, address, pmdp);
+ return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
}
#define __HAVE_ARCH_PMDP_INVALIDATE
@@ -1548,6 +1615,14 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
}
}
+static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
+ unsigned long address,
+ pmd_t *pmdp)
+{
+ return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
+}
+#define pmdp_collapse_flush pmdp_collapse_flush
+
#define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot))
#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
@@ -1565,9 +1640,9 @@ static inline int has_transparent_hugepage(void)
/*
* 64 bit swap entry format:
* A page-table entry has some bits we have to treat in a special way.
- * Bits 52 and bit 55 have to be zero, otherwise an specification
+ * Bits 52 and bit 55 have to be zero, otherwise a specification
* exception will occur instead of a page translation exception. The
- * specifiation exception has the bad habit not to store necessary
+ * specification exception has the bad habit not to store necessary
* information in the lowcore.
* Bits 54 and 63 are used to indicate the page type.
* A swap pte is indicated by bit pattern (pte & 0x201) == 0x200
diff --git a/kernel/arch/s390/include/asm/processor.h b/kernel/arch/s390/include/asm/processor.h
index dedb62185..b16c3d0a1 100644
--- a/kernel/arch/s390/include/asm/processor.h
+++ b/kernel/arch/s390/include/asm/processor.h
@@ -11,13 +11,19 @@
#ifndef __ASM_S390_PROCESSOR_H
#define __ASM_S390_PROCESSOR_H
+#include <linux/const.h>
+
#define CIF_MCCK_PENDING 0 /* machine check handling is pending */
#define CIF_ASCE 1 /* user asce needs fixup / uaccess */
#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
+#define CIF_FPU 3 /* restore FPU registers */
+#define CIF_IGNORE_IRQ 4 /* ignore interrupt (for udelay) */
-#define _CIF_MCCK_PENDING (1<<CIF_MCCK_PENDING)
-#define _CIF_ASCE (1<<CIF_ASCE)
-#define _CIF_NOHZ_DELAY (1<<CIF_NOHZ_DELAY)
+#define _CIF_MCCK_PENDING _BITUL(CIF_MCCK_PENDING)
+#define _CIF_ASCE _BITUL(CIF_ASCE)
+#define _CIF_NOHZ_DELAY _BITUL(CIF_NOHZ_DELAY)
+#define _CIF_FPU _BITUL(CIF_FPU)
+#define _CIF_IGNORE_IRQ _BITUL(CIF_IGNORE_IRQ)
#ifndef __ASSEMBLY__
@@ -28,20 +34,22 @@
#include <asm/ptrace.h>
#include <asm/setup.h>
#include <asm/runtime_instr.h>
+#include <asm/fpu/types.h>
+#include <asm/fpu/internal.h>
static inline void set_cpu_flag(int flag)
{
- S390_lowcore.cpu_flags |= (1U << flag);
+ S390_lowcore.cpu_flags |= (1UL << flag);
}
static inline void clear_cpu_flag(int flag)
{
- S390_lowcore.cpu_flags &= ~(1U << flag);
+ S390_lowcore.cpu_flags &= ~(1UL << flag);
}
static inline int test_cpu_flag(int flag)
{
- return !!(S390_lowcore.cpu_flags & (1U << flag));
+ return !!(S390_lowcore.cpu_flags & (1UL << flag));
}
#define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY)
@@ -85,7 +93,7 @@ typedef struct {
* Thread structure
*/
struct thread_struct {
- s390_fp_regs fp_regs;
+ struct fpu fpu; /* FP and VX register save area */
unsigned int acrs[NUM_ACRS];
unsigned long ksp; /* kernel stack pointer */
mm_segment_t mm_segment;
@@ -99,9 +107,7 @@ struct thread_struct {
struct list_head list;
/* cpu runtime instrumentation */
struct runtime_instr_cb *ri_cb;
- int ri_signum;
unsigned char trap_tdb[256]; /* Transaction abort diagnose block */
- __vector128 *vxrs; /* Vector register save area */
};
/* Flag to disable transactions. */
@@ -137,8 +143,10 @@ struct stack_frame {
#define ARCH_MIN_TASKALIGN 8
+extern __vector128 init_task_fpu_regs[__NUM_VXRS];
#define INIT_THREAD { \
.ksp = sizeof(init_stack) + (unsigned long) &init_stack, \
+ .fpu.regs = (void *)&init_task_fpu_regs, \
}
/*
@@ -215,7 +223,7 @@ static inline void __load_psw(psw_t psw)
* Set PSW mask to specified value, while leaving the
* PSW addr pointing to the next instruction.
*/
-static inline void __load_psw_mask (unsigned long mask)
+static inline void __load_psw_mask(unsigned long mask)
{
unsigned long addr;
psw_t psw;
@@ -231,6 +239,27 @@ static inline void __load_psw_mask (unsigned long mask)
}
/*
+ * Extract current PSW mask
+ */
+static inline unsigned long __extract_psw(void)
+{
+ unsigned int reg1, reg2;
+
+ asm volatile("epsw %0,%1" : "=d" (reg1), "=a" (reg2));
+ return (((unsigned long) reg1) << 32) | ((unsigned long) reg2);
+}
+
+static inline void local_mcck_enable(void)
+{
+ __load_psw_mask(__extract_psw() | PSW_MASK_MCHECK);
+}
+
+static inline void local_mcck_disable(void)
+{
+ __load_psw_mask(__extract_psw() & ~PSW_MASK_MCHECK);
+}
+
+/*
* Rewind PSW instruction address by specified number of bytes.
*/
static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc)
@@ -253,65 +282,14 @@ void enabled_wait(void);
*/
static inline void __noreturn disabled_wait(unsigned long code)
{
- unsigned long ctl_buf;
- psw_t dw_psw;
-
- dw_psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA;
- dw_psw.addr = code;
- /*
- * Store status and then load disabled wait psw,
- * the processor is dead afterwards
- */
- asm volatile(
- " stctg 0,0,0(%2)\n"
- " ni 4(%2),0xef\n" /* switch off protection */
- " lctlg 0,0,0(%2)\n"
- " lghi 1,0x1000\n"
- " stpt 0x328(1)\n" /* store timer */
- " stckc 0x330(1)\n" /* store clock comparator */
- " stpx 0x318(1)\n" /* store prefix register */
- " stam 0,15,0x340(1)\n"/* store access registers */
- " stfpc 0x31c(1)\n" /* store fpu control */
- " std 0,0x200(1)\n" /* store f0 */
- " std 1,0x208(1)\n" /* store f1 */
- " std 2,0x210(1)\n" /* store f2 */
- " std 3,0x218(1)\n" /* store f3 */
- " std 4,0x220(1)\n" /* store f4 */
- " std 5,0x228(1)\n" /* store f5 */
- " std 6,0x230(1)\n" /* store f6 */
- " std 7,0x238(1)\n" /* store f7 */
- " std 8,0x240(1)\n" /* store f8 */
- " std 9,0x248(1)\n" /* store f9 */
- " std 10,0x250(1)\n" /* store f10 */
- " std 11,0x258(1)\n" /* store f11 */
- " std 12,0x260(1)\n" /* store f12 */
- " std 13,0x268(1)\n" /* store f13 */
- " std 14,0x270(1)\n" /* store f14 */
- " std 15,0x278(1)\n" /* store f15 */
- " stmg 0,15,0x280(1)\n"/* store general registers */
- " stctg 0,15,0x380(1)\n"/* store control registers */
- " oi 0x384(1),0x10\n"/* fake protection bit */
- " lpswe 0(%1)"
- : "=m" (ctl_buf)
- : "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc", "0", "1");
- while (1);
-}
+ psw_t psw;
-/*
- * Use to set psw mask except for the first byte which
- * won't be changed by this function.
- */
-static inline void
-__set_psw_mask(unsigned long mask)
-{
- __load_psw_mask(mask | (arch_local_save_flags() & ~(-1UL >> 8)));
+ psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA;
+ psw.addr = code;
+ __load_psw(psw);
+ while (1);
}
-#define local_mcck_enable() \
- __set_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_MCHECK)
-#define local_mcck_disable() \
- __set_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT)
-
/*
* Basic Machine Check/Program Check Handler.
*/
@@ -336,25 +314,6 @@ extern void memcpy_absolute(void *, void *, size_t);
memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \
}
-/*
- * Helper macro for exception table entries
- */
-#define EX_TABLE(_fault, _target) \
- ".section __ex_table,\"a\"\n" \
- ".align 4\n" \
- ".long (" #_fault ") - .\n" \
- ".long (" #_target ") - .\n" \
- ".previous\n"
-
-#else /* __ASSEMBLY__ */
-
-#define EX_TABLE(_fault, _target) \
- .section __ex_table,"a" ; \
- .align 4 ; \
- .long (_fault) - . ; \
- .long (_target) - . ; \
- .previous
-
#endif /* __ASSEMBLY__ */
#endif /* __ASM_S390_PROCESSOR_H */
diff --git a/kernel/arch/s390/include/asm/ptrace.h b/kernel/arch/s390/include/asm/ptrace.h
index 6feda2599..37cbc5094 100644
--- a/kernel/arch/s390/include/asm/ptrace.h
+++ b/kernel/arch/s390/include/asm/ptrace.h
@@ -6,13 +6,14 @@
#ifndef _S390_PTRACE_H
#define _S390_PTRACE_H
+#include <linux/const.h>
#include <uapi/asm/ptrace.h>
#define PIF_SYSCALL 0 /* inside a system call */
#define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */
-#define _PIF_SYSCALL (1<<PIF_SYSCALL)
-#define _PIF_PER_TRAP (1<<PIF_PER_TRAP)
+#define _PIF_SYSCALL _BITUL(PIF_SYSCALL)
+#define _PIF_PER_TRAP _BITUL(PIF_PER_TRAP)
#ifndef __ASSEMBLY__
@@ -128,17 +129,17 @@ struct per_struct_kernel {
static inline void set_pt_regs_flag(struct pt_regs *regs, int flag)
{
- regs->flags |= (1U << flag);
+ regs->flags |= (1UL << flag);
}
static inline void clear_pt_regs_flag(struct pt_regs *regs, int flag)
{
- regs->flags &= ~(1U << flag);
+ regs->flags &= ~(1UL << flag);
}
static inline int test_pt_regs_flag(struct pt_regs *regs, int flag)
{
- return !!(regs->flags & (1U << flag));
+ return !!(regs->flags & (1UL << flag));
}
/*
diff --git a/kernel/arch/s390/include/asm/sclp.h b/kernel/arch/s390/include/asm/sclp.h
index f1096bab5..821dde5f4 100644
--- a/kernel/arch/s390/include/asm/sclp.h
+++ b/kernel/arch/s390/include/asm/sclp.h
@@ -11,6 +11,7 @@
#include <asm/cpu.h>
#define SCLP_CHP_INFO_MASK_SIZE 32
+#define SCLP_MAX_CORES 256
struct sclp_chp_info {
u8 recognized[SCLP_CHP_INFO_MASK_SIZE];
@@ -26,7 +27,7 @@ struct sclp_ipl_info {
char loadparm[LOADPARM_LEN];
};
-struct sclp_cpu_entry {
+struct sclp_core_entry {
u8 core_id;
u8 reserved0[2];
u8 : 3;
@@ -38,41 +39,46 @@ struct sclp_cpu_entry {
u8 reserved1;
} __attribute__((packed));
-struct sclp_cpu_info {
+struct sclp_core_info {
unsigned int configured;
unsigned int standby;
unsigned int combined;
- int has_cpu_type;
- struct sclp_cpu_entry cpu[MAX_CPU_ADDRESS + 1];
+ struct sclp_core_entry core[SCLP_MAX_CORES];
};
-int sclp_get_cpu_info(struct sclp_cpu_info *info);
-int sclp_cpu_configure(u8 cpu);
-int sclp_cpu_deconfigure(u8 cpu);
-unsigned long long sclp_get_rnmax(void);
-unsigned long long sclp_get_rzm(void);
-unsigned int sclp_get_max_cpu(void);
-unsigned int sclp_get_mtid(u8 cpu_type);
-unsigned int sclp_get_mtid_max(void);
-unsigned int sclp_get_mtid_prev(void);
+struct sclp_info {
+ unsigned char has_linemode : 1;
+ unsigned char has_vt220 : 1;
+ unsigned char has_siif : 1;
+ unsigned char has_sigpif : 1;
+ unsigned char has_core_type : 1;
+ unsigned char has_sprp : 1;
+ unsigned int ibc;
+ unsigned int mtid;
+ unsigned int mtid_cp;
+ unsigned int mtid_prev;
+ unsigned long long rzm;
+ unsigned long long rnmax;
+ unsigned long long hamax;
+ unsigned int max_cores;
+ unsigned long hsa_size;
+ unsigned long long facilities;
+};
+extern struct sclp_info sclp;
+
+int sclp_get_core_info(struct sclp_core_info *info);
+int sclp_core_configure(u8 core);
+int sclp_core_deconfigure(u8 core);
int sclp_sdias_blk_count(void);
int sclp_sdias_copy(void *dest, int blk_num, int nr_blks);
int sclp_chp_configure(struct chp_id chpid);
int sclp_chp_deconfigure(struct chp_id chpid);
int sclp_chp_read_info(struct sclp_chp_info *info);
void sclp_get_ipl_info(struct sclp_ipl_info *info);
-bool __init sclp_has_linemode(void);
-bool __init sclp_has_vt220(void);
-bool sclp_has_sprp(void);
int sclp_pci_configure(u32 fid);
int sclp_pci_deconfigure(u32 fid);
int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
-unsigned long sclp_get_hsa_size(void);
void sclp_early_detect(void);
-int sclp_has_siif(void);
-int sclp_has_sigpif(void);
-unsigned int sclp_get_ibc(void);
-
-long _sclp_print_early(const char *);
+int _sclp_print_early(const char *);
#endif /* _ASM_S390_SCLP_H */
diff --git a/kernel/arch/s390/include/asm/setup.h b/kernel/arch/s390/include/asm/setup.h
index b8ffc1bd0..23537661d 100644
--- a/kernel/arch/s390/include/asm/setup.h
+++ b/kernel/arch/s390/include/asm/setup.h
@@ -5,11 +5,38 @@
#ifndef _ASM_S390_SETUP_H
#define _ASM_S390_SETUP_H
+#include <linux/const.h>
#include <uapi/asm/setup.h>
#define PARMAREA 0x10400
+/*
+ * Machine features detected in head.S
+ */
+
+#define MACHINE_FLAG_VM _BITUL(0)
+#define MACHINE_FLAG_IEEE _BITUL(1)
+#define MACHINE_FLAG_CSP _BITUL(2)
+#define MACHINE_FLAG_MVPG _BITUL(3)
+#define MACHINE_FLAG_DIAG44 _BITUL(4)
+#define MACHINE_FLAG_IDTE _BITUL(5)
+#define MACHINE_FLAG_DIAG9C _BITUL(6)
+#define MACHINE_FLAG_KVM _BITUL(8)
+#define MACHINE_FLAG_ESOP _BITUL(9)
+#define MACHINE_FLAG_EDAT1 _BITUL(10)
+#define MACHINE_FLAG_EDAT2 _BITUL(11)
+#define MACHINE_FLAG_LPAR _BITUL(12)
+#define MACHINE_FLAG_LPP _BITUL(13)
+#define MACHINE_FLAG_TOPOLOGY _BITUL(14)
+#define MACHINE_FLAG_TE _BITUL(15)
+#define MACHINE_FLAG_TLB_LC _BITUL(17)
+#define MACHINE_FLAG_VX _BITUL(18)
+#define MACHINE_FLAG_CAD _BITUL(19)
+
+#define LPP_MAGIC _BITUL(31)
+#define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL)
+
#ifndef __ASSEMBLY__
#include <asm/lowcore.h>
@@ -28,29 +55,6 @@ extern unsigned long max_physmem_end;
extern void detect_memory_memblock(void);
-/*
- * Machine features detected in head.S
- */
-
-#define MACHINE_FLAG_VM (1UL << 0)
-#define MACHINE_FLAG_IEEE (1UL << 1)
-#define MACHINE_FLAG_CSP (1UL << 2)
-#define MACHINE_FLAG_MVPG (1UL << 3)
-#define MACHINE_FLAG_DIAG44 (1UL << 4)
-#define MACHINE_FLAG_IDTE (1UL << 5)
-#define MACHINE_FLAG_DIAG9C (1UL << 6)
-#define MACHINE_FLAG_KVM (1UL << 8)
-#define MACHINE_FLAG_ESOP (1UL << 9)
-#define MACHINE_FLAG_EDAT1 (1UL << 10)
-#define MACHINE_FLAG_EDAT2 (1UL << 11)
-#define MACHINE_FLAG_LPAR (1UL << 12)
-#define MACHINE_FLAG_LPP (1UL << 13)
-#define MACHINE_FLAG_TOPOLOGY (1UL << 14)
-#define MACHINE_FLAG_TE (1UL << 15)
-#define MACHINE_FLAG_TLB_LC (1UL << 17)
-#define MACHINE_FLAG_VX (1UL << 18)
-#define MACHINE_FLAG_CAD (1UL << 19)
-
#define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM)
#define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
#define MACHINE_IS_LPAR (S390_lowcore.machine_flags & MACHINE_FLAG_LPAR)
diff --git a/kernel/arch/s390/include/asm/smp.h b/kernel/arch/s390/include/asm/smp.h
index b3bd0282d..5df26b11c 100644
--- a/kernel/arch/s390/include/asm/smp.h
+++ b/kernel/arch/s390/include/asm/smp.h
@@ -29,6 +29,7 @@ extern void smp_call_ipl_cpu(void (*func)(void *), void *);
extern int smp_find_processor_id(u16 address);
extern int smp_store_status(int cpu);
+extern void smp_save_dump_cpus(void);
extern int smp_vcpu_scheduled(int cpu);
extern void smp_yield_cpu(int cpu);
extern void smp_cpu_set_polarization(int cpu, int val);
@@ -54,6 +55,7 @@ static inline int smp_store_status(int cpu) { return 0; }
static inline int smp_vcpu_scheduled(int cpu) { return 1; }
static inline void smp_yield_cpu(int cpu) { }
static inline void smp_fill_possible_mask(void) { }
+static inline void smp_save_dump_cpus(void) { }
#endif /* CONFIG_SMP */
diff --git a/kernel/arch/s390/include/asm/spinlock.h b/kernel/arch/s390/include/asm/spinlock.h
index 0e37cd041..63ebf37d3 100644
--- a/kernel/arch/s390/include/asm/spinlock.h
+++ b/kernel/arch/s390/include/asm/spinlock.h
@@ -87,7 +87,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
{
typecheck(unsigned int, lp->lock);
asm volatile(
- __ASM_BARRIER
"st %1,%0\n"
: "+Q" (lp->lock)
: "d" (0)
@@ -169,7 +168,6 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw)
\
typecheck(unsigned int *, ptr); \
asm volatile( \
- "bcr 14,0\n" \
op_string " %0,%2,%1\n" \
: "=d" (old_val), "+Q" (*ptr) \
: "d" (op_val) \
@@ -243,7 +241,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
rw->owner = 0;
asm volatile(
- __ASM_BARRIER
"st %1,%0\n"
: "+Q" (rw->lock)
: "d" (0)
diff --git a/kernel/arch/s390/include/asm/switch_to.h b/kernel/arch/s390/include/asm/switch_to.h
index d62e7a696..12d45f0cf 100644
--- a/kernel/arch/s390/include/asm/switch_to.h
+++ b/kernel/arch/s390/include/asm/switch_to.h
@@ -8,139 +8,12 @@
#define __ASM_SWITCH_TO_H
#include <linux/thread_info.h>
+#include <asm/fpu/api.h>
#include <asm/ptrace.h>
extern struct task_struct *__switch_to(void *, void *);
extern void update_cr_regs(struct task_struct *task);
-static inline int test_fp_ctl(u32 fpc)
-{
- u32 orig_fpc;
- int rc;
-
- asm volatile(
- " efpc %1\n"
- " sfpc %2\n"
- "0: sfpc %1\n"
- " la %0,0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "=d" (rc), "=d" (orig_fpc)
- : "d" (fpc), "0" (-EINVAL));
- return rc;
-}
-
-static inline void save_fp_ctl(u32 *fpc)
-{
- asm volatile(
- " stfpc %0\n"
- : "+Q" (*fpc));
-}
-
-static inline int restore_fp_ctl(u32 *fpc)
-{
- int rc;
-
- asm volatile(
- " lfpc %1\n"
- "0: la %0,0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "=d" (rc) : "Q" (*fpc), "0" (-EINVAL));
- return rc;
-}
-
-static inline void save_fp_regs(freg_t *fprs)
-{
- asm volatile("std 0,%0" : "=Q" (fprs[0]));
- asm volatile("std 2,%0" : "=Q" (fprs[2]));
- asm volatile("std 4,%0" : "=Q" (fprs[4]));
- asm volatile("std 6,%0" : "=Q" (fprs[6]));
- asm volatile("std 1,%0" : "=Q" (fprs[1]));
- asm volatile("std 3,%0" : "=Q" (fprs[3]));
- asm volatile("std 5,%0" : "=Q" (fprs[5]));
- asm volatile("std 7,%0" : "=Q" (fprs[7]));
- asm volatile("std 8,%0" : "=Q" (fprs[8]));
- asm volatile("std 9,%0" : "=Q" (fprs[9]));
- asm volatile("std 10,%0" : "=Q" (fprs[10]));
- asm volatile("std 11,%0" : "=Q" (fprs[11]));
- asm volatile("std 12,%0" : "=Q" (fprs[12]));
- asm volatile("std 13,%0" : "=Q" (fprs[13]));
- asm volatile("std 14,%0" : "=Q" (fprs[14]));
- asm volatile("std 15,%0" : "=Q" (fprs[15]));
-}
-
-static inline void restore_fp_regs(freg_t *fprs)
-{
- asm volatile("ld 0,%0" : : "Q" (fprs[0]));
- asm volatile("ld 2,%0" : : "Q" (fprs[2]));
- asm volatile("ld 4,%0" : : "Q" (fprs[4]));
- asm volatile("ld 6,%0" : : "Q" (fprs[6]));
- asm volatile("ld 1,%0" : : "Q" (fprs[1]));
- asm volatile("ld 3,%0" : : "Q" (fprs[3]));
- asm volatile("ld 5,%0" : : "Q" (fprs[5]));
- asm volatile("ld 7,%0" : : "Q" (fprs[7]));
- asm volatile("ld 8,%0" : : "Q" (fprs[8]));
- asm volatile("ld 9,%0" : : "Q" (fprs[9]));
- asm volatile("ld 10,%0" : : "Q" (fprs[10]));
- asm volatile("ld 11,%0" : : "Q" (fprs[11]));
- asm volatile("ld 12,%0" : : "Q" (fprs[12]));
- asm volatile("ld 13,%0" : : "Q" (fprs[13]));
- asm volatile("ld 14,%0" : : "Q" (fprs[14]));
- asm volatile("ld 15,%0" : : "Q" (fprs[15]));
-}
-
-static inline void save_vx_regs(__vector128 *vxrs)
-{
- typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
-
- asm volatile(
- " la 1,%0\n"
- " .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */
- " .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */
- : "=Q" (*(addrtype *) vxrs) : : "1");
-}
-
-static inline void save_vx_regs_safe(__vector128 *vxrs)
-{
- unsigned long cr0, flags;
-
- flags = arch_local_irq_save();
- __ctl_store(cr0, 0, 0);
- __ctl_set_bit(0, 17);
- __ctl_set_bit(0, 18);
- save_vx_regs(vxrs);
- __ctl_load(cr0, 0, 0);
- arch_local_irq_restore(flags);
-}
-
-static inline void restore_vx_regs(__vector128 *vxrs)
-{
- typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
-
- asm volatile(
- " la 1,%0\n"
- " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */
- " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */
- : : "Q" (*(addrtype *) vxrs) : "1");
-}
-
-static inline void save_fp_vx_regs(struct task_struct *task)
-{
- if (task->thread.vxrs)
- save_vx_regs(task->thread.vxrs);
- else
- save_fp_regs(task->thread.fp_regs.fprs);
-}
-
-static inline void restore_fp_vx_regs(struct task_struct *task)
-{
- if (task->thread.vxrs)
- restore_vx_regs(task->thread.vxrs);
- else
- restore_fp_regs(task->thread.fp_regs.fprs);
-}
-
static inline void save_access_regs(unsigned int *acrs)
{
typedef struct { int _[NUM_ACRS]; } acrstype;
@@ -157,15 +30,13 @@ static inline void restore_access_regs(unsigned int *acrs)
#define switch_to(prev,next,last) do { \
if (prev->mm) { \
- save_fp_ctl(&prev->thread.fp_regs.fpc); \
- save_fp_vx_regs(prev); \
+ save_fpu_regs(); \
save_access_regs(&prev->thread.acrs[0]); \
save_ri_cb(prev->thread.ri_cb); \
} \
if (next->mm) { \
update_cr_regs(next); \
- restore_fp_ctl(&next->thread.fp_regs.fpc); \
- restore_fp_vx_regs(next); \
+ set_cpu_flag(CIF_FPU); \
restore_access_regs(&next->thread.acrs[0]); \
restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
} \
diff --git a/kernel/arch/s390/include/asm/thread_info.h b/kernel/arch/s390/include/asm/thread_info.h
index 4c27ec764..692b9247c 100644
--- a/kernel/arch/s390/include/asm/thread_info.h
+++ b/kernel/arch/s390/include/asm/thread_info.h
@@ -7,6 +7,8 @@
#ifndef _ASM_THREAD_INFO_H
#define _ASM_THREAD_INFO_H
+#include <linux/const.h>
+
/*
* Size of kernel stack for each process
*/
@@ -83,16 +85,16 @@ void arch_release_task_struct(struct task_struct *tsk);
#define TIF_BLOCK_STEP 20 /* This task is block stepped */
#define TIF_UPROBE_SINGLESTEP 21 /* This task is uprobe single stepped */
-#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
-#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
-#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
-#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
-#define _TIF_SECCOMP (1<<TIF_SECCOMP)
-#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
-#define _TIF_UPROBE (1<<TIF_UPROBE)
-#define _TIF_31BIT (1<<TIF_31BIT)
-#define _TIF_SINGLE_STEP (1<<TIF_SINGLE_STEP)
+#define _TIF_NOTIFY_RESUME _BITUL(TIF_NOTIFY_RESUME)
+#define _TIF_SIGPENDING _BITUL(TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED _BITUL(TIF_NEED_RESCHED)
+#define _TIF_SYSCALL_TRACE _BITUL(TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT _BITUL(TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP _BITUL(TIF_SECCOMP)
+#define _TIF_SYSCALL_TRACEPOINT _BITUL(TIF_SYSCALL_TRACEPOINT)
+#define _TIF_UPROBE _BITUL(TIF_UPROBE)
+#define _TIF_31BIT _BITUL(TIF_31BIT)
+#define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP)
#define is_32bit_task() (test_thread_flag(TIF_31BIT))
diff --git a/kernel/arch/s390/include/asm/timex.h b/kernel/arch/s390/include/asm/timex.h
index 98eb2a579..dcb6312a0 100644
--- a/kernel/arch/s390/include/asm/timex.h
+++ b/kernel/arch/s390/include/asm/timex.h
@@ -10,6 +10,7 @@
#define _ASM_S390_TIMEX_H
#include <asm/lowcore.h>
+#include <linux/time64.h>
/* The value of the TOD clock for 1.1.1970. */
#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
@@ -108,10 +109,10 @@ int get_sync_clock(unsigned long long *clock);
void init_cpu_timer(void);
unsigned long long monotonic_clock(void);
-void tod_to_timeval(__u64, struct timespec *);
+void tod_to_timeval(__u64 todval, struct timespec64 *xt);
static inline
-void stck_to_timespec(unsigned long long stck, struct timespec *ts)
+void stck_to_timespec64(unsigned long long stck, struct timespec64 *ts)
{
tod_to_timeval(stck - TOD_UNIX_EPOCH, ts);
}
diff --git a/kernel/arch/s390/include/asm/topology.h b/kernel/arch/s390/include/asm/topology.h
index b1453a2ae..94fc55fc7 100644
--- a/kernel/arch/s390/include/asm/topology.h
+++ b/kernel/arch/s390/include/asm/topology.h
@@ -2,6 +2,7 @@
#define _ASM_S390_TOPOLOGY_H
#include <linux/cpumask.h>
+#include <asm/numa.h>
struct sysinfo_15_1_x;
struct cpu;
@@ -13,6 +14,7 @@ struct cpu_topology_s390 {
unsigned short core_id;
unsigned short socket_id;
unsigned short book_id;
+ unsigned short node_id;
cpumask_t thread_mask;
cpumask_t core_mask;
cpumask_t book_mask;
@@ -22,7 +24,8 @@ DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
#define topology_physical_package_id(cpu) (per_cpu(cpu_topology, cpu).socket_id)
#define topology_thread_id(cpu) (per_cpu(cpu_topology, cpu).thread_id)
-#define topology_thread_cpumask(cpu) (&per_cpu(cpu_topology, cpu).thread_mask)
+#define topology_sibling_cpumask(cpu) \
+ (&per_cpu(cpu_topology, cpu).thread_mask)
#define topology_core_id(cpu) (per_cpu(cpu_topology, cpu).core_id)
#define topology_core_cpumask(cpu) (&per_cpu(cpu_topology, cpu).core_mask)
#define topology_book_id(cpu) (per_cpu(cpu_topology, cpu).book_id)
@@ -51,6 +54,43 @@ static inline void topology_expect_change(void) { }
#define POLARIZATION_VM (2)
#define POLARIZATION_VH (3)
+#define SD_BOOK_INIT SD_CPU_INIT
+
+#ifdef CONFIG_NUMA
+
+#define cpu_to_node cpu_to_node
+static inline int cpu_to_node(int cpu)
+{
+ return per_cpu(cpu_topology, cpu).node_id;
+}
+
+/* Returns a pointer to the cpumask of CPUs on node 'node'. */
+#define cpumask_of_node cpumask_of_node
+static inline const struct cpumask *cpumask_of_node(int node)
+{
+ return &node_to_cpumask_map[node];
+}
+
+/*
+ * Returns the number of the node containing node 'node'. This
+ * architecture is flat, so it is a pretty simple function!
+ */
+#define parent_node(node) (node)
+
+#define pcibus_to_node(bus) __pcibus_to_node(bus)
+
+#define node_distance(a, b) __node_distance(a, b)
+
+#else /* !CONFIG_NUMA */
+
+#define numa_node_id numa_node_id
+static inline int numa_node_id(void)
+{
+ return 0;
+}
+
+#endif /* CONFIG_NUMA */
+
#include <asm-generic/topology.h>
#endif /* _ASM_S390_TOPOLOGY_H */
diff --git a/kernel/arch/s390/include/asm/trace/diag.h b/kernel/arch/s390/include/asm/trace/diag.h
new file mode 100644
index 000000000..cc6cfe788
--- /dev/null
+++ b/kernel/arch/s390/include/asm/trace/diag.h
@@ -0,0 +1,43 @@
+/*
+ * Tracepoint header for s390 diagnose calls
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM s390
+
+#if !defined(_TRACE_S390_DIAG_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_S390_DIAG_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH asm/trace
+#define TRACE_INCLUDE_FILE diag
+
+TRACE_EVENT(s390_diagnose,
+ TP_PROTO(unsigned short nr),
+ TP_ARGS(nr),
+ TP_STRUCT__entry(
+ __field(unsigned short, nr)
+ ),
+ TP_fast_assign(
+ __entry->nr = nr;
+ ),
+ TP_printk("nr=0x%x", __entry->nr)
+);
+
+#ifdef CONFIG_TRACEPOINTS
+void trace_s390_diagnose_norecursion(int diag_nr);
+#else
+static inline void trace_s390_diagnose_norecursion(int diag_nr) { }
+#endif
+
+#endif /* _TRACE_S390_DIAG_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/arch/s390/include/asm/unistd.h b/kernel/arch/s390/include/asm/unistd.h
index 91f56b1d8..02613bad8 100644
--- a/kernel/arch/s390/include/asm/unistd.h
+++ b/kernel/arch/s390/include/asm/unistd.h
@@ -8,20 +8,8 @@
#include <uapi/asm/unistd.h>
-
#define __IGNORE_time
-/* Ignore NUMA system calls. Not wired up on s390. */
-#define __IGNORE_mbind
-#define __IGNORE_get_mempolicy
-#define __IGNORE_set_mempolicy
-#define __IGNORE_migrate_pages
-#define __IGNORE_move_pages
-
-/* Ignore system calls that are also reachable via sys_socket */
-#define __IGNORE_recvmmsg
-#define __IGNORE_sendmmsg
-
#define __ARCH_WANT_OLD_READDIR
#define __ARCH_WANT_SYS_ALARM
#define __ARCH_WANT_SYS_GETHOSTNAME
diff --git a/kernel/arch/s390/include/asm/vx-insn.h b/kernel/arch/s390/include/asm/vx-insn.h
new file mode 100644
index 000000000..4a3135620
--- /dev/null
+++ b/kernel/arch/s390/include/asm/vx-insn.h
@@ -0,0 +1,480 @@
+/*
+ * Support for Vector Instructions
+ *
+ * Assembler macros to generate .byte/.word code for particular
+ * vector instructions that are supported by recent binutils (>= 2.26) only.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_S390_VX_INSN_H
+#define __ASM_S390_VX_INSN_H
+
+#ifdef __ASSEMBLY__
+
+
+/* Macros to generate vector instruction byte code */
+
+#define REG_NUM_INVALID 255
+
+/* GR_NUM - Retrieve general-purpose register number
+ *
+ * @opd: Operand to store register number
+ * @r64: String designation register in the format "%rN"
+ */
+.macro GR_NUM opd gr
+ \opd = REG_NUM_INVALID
+ .ifc \gr,%r0
+ \opd = 0
+ .endif
+ .ifc \gr,%r1
+ \opd = 1
+ .endif
+ .ifc \gr,%r2
+ \opd = 2
+ .endif
+ .ifc \gr,%r3
+ \opd = 3
+ .endif
+ .ifc \gr,%r4
+ \opd = 4
+ .endif
+ .ifc \gr,%r5
+ \opd = 5
+ .endif
+ .ifc \gr,%r6
+ \opd = 6
+ .endif
+ .ifc \gr,%r7
+ \opd = 7
+ .endif
+ .ifc \gr,%r8
+ \opd = 8
+ .endif
+ .ifc \gr,%r9
+ \opd = 9
+ .endif
+ .ifc \gr,%r10
+ \opd = 10
+ .endif
+ .ifc \gr,%r11
+ \opd = 11
+ .endif
+ .ifc \gr,%r12
+ \opd = 12
+ .endif
+ .ifc \gr,%r13
+ \opd = 13
+ .endif
+ .ifc \gr,%r14
+ \opd = 14
+ .endif
+ .ifc \gr,%r15
+ \opd = 15
+ .endif
+ .if \opd == REG_NUM_INVALID
+ .error "Invalid general-purpose register designation: \gr"
+ .endif
+.endm
+
+/* VX_R() - Macro to encode the VX_NUM into the instruction */
+#define VX_R(v) (v & 0x0F)
+
+/* VX_NUM - Retrieve vector register number
+ *
+ * @opd: Operand to store register number
+ * @vxr: String designation register in the format "%vN"
+ *
+ * The vector register number is used for as input number to the
+ * instruction and, as well as, to compute the RXB field of the
+ * instruction. To encode the particular vector register number,
+ * use the VX_R(v) macro to extract the instruction opcode.
+ */
+.macro VX_NUM opd vxr
+ \opd = REG_NUM_INVALID
+ .ifc \vxr,%v0
+ \opd = 0
+ .endif
+ .ifc \vxr,%v1
+ \opd = 1
+ .endif
+ .ifc \vxr,%v2
+ \opd = 2
+ .endif
+ .ifc \vxr,%v3
+ \opd = 3
+ .endif
+ .ifc \vxr,%v4
+ \opd = 4
+ .endif
+ .ifc \vxr,%v5
+ \opd = 5
+ .endif
+ .ifc \vxr,%v6
+ \opd = 6
+ .endif
+ .ifc \vxr,%v7
+ \opd = 7
+ .endif
+ .ifc \vxr,%v8
+ \opd = 8
+ .endif
+ .ifc \vxr,%v9
+ \opd = 9
+ .endif
+ .ifc \vxr,%v10
+ \opd = 10
+ .endif
+ .ifc \vxr,%v11
+ \opd = 11
+ .endif
+ .ifc \vxr,%v12
+ \opd = 12
+ .endif
+ .ifc \vxr,%v13
+ \opd = 13
+ .endif
+ .ifc \vxr,%v14
+ \opd = 14
+ .endif
+ .ifc \vxr,%v15
+ \opd = 15
+ .endif
+ .ifc \vxr,%v16
+ \opd = 16
+ .endif
+ .ifc \vxr,%v17
+ \opd = 17
+ .endif
+ .ifc \vxr,%v18
+ \opd = 18
+ .endif
+ .ifc \vxr,%v19
+ \opd = 19
+ .endif
+ .ifc \vxr,%v20
+ \opd = 20
+ .endif
+ .ifc \vxr,%v21
+ \opd = 21
+ .endif
+ .ifc \vxr,%v22
+ \opd = 22
+ .endif
+ .ifc \vxr,%v23
+ \opd = 23
+ .endif
+ .ifc \vxr,%v24
+ \opd = 24
+ .endif
+ .ifc \vxr,%v25
+ \opd = 25
+ .endif
+ .ifc \vxr,%v26
+ \opd = 26
+ .endif
+ .ifc \vxr,%v27
+ \opd = 27
+ .endif
+ .ifc \vxr,%v28
+ \opd = 28
+ .endif
+ .ifc \vxr,%v29
+ \opd = 29
+ .endif
+ .ifc \vxr,%v30
+ \opd = 30
+ .endif
+ .ifc \vxr,%v31
+ \opd = 31
+ .endif
+ .if \opd == REG_NUM_INVALID
+ .error "Invalid vector register designation: \vxr"
+ .endif
+.endm
+
+/* RXB - Compute most significant bit used vector registers
+ *
+ * @rxb: Operand to store computed RXB value
+ * @v1: First vector register designated operand
+ * @v2: Second vector register designated operand
+ * @v3: Third vector register designated operand
+ * @v4: Fourth vector register designated operand
+ */
+.macro RXB rxb v1 v2=0 v3=0 v4=0
+ \rxb = 0
+ .if \v1 & 0x10
+ \rxb = \rxb | 0x08
+ .endif
+ .if \v2 & 0x10
+ \rxb = \rxb | 0x04
+ .endif
+ .if \v3 & 0x10
+ \rxb = \rxb | 0x02
+ .endif
+ .if \v4 & 0x10
+ \rxb = \rxb | 0x01
+ .endif
+.endm
+
+/* MRXB - Generate Element Size Control and RXB value
+ *
+ * @m: Element size control
+ * @v1: First vector register designated operand (for RXB)
+ * @v2: Second vector register designated operand (for RXB)
+ * @v3: Third vector register designated operand (for RXB)
+ * @v4: Fourth vector register designated operand (for RXB)
+ */
+.macro MRXB m v1 v2=0 v3=0 v4=0
+ rxb = 0
+ RXB rxb, \v1, \v2, \v3, \v4
+ .byte (\m << 4) | rxb
+.endm
+
+/* MRXBOPC - Generate Element Size Control, RXB, and final Opcode fields
+ *
+ * @m: Element size control
+ * @opc: Opcode
+ * @v1: First vector register designated operand (for RXB)
+ * @v2: Second vector register designated operand (for RXB)
+ * @v3: Third vector register designated operand (for RXB)
+ * @v4: Fourth vector register designated operand (for RXB)
+ */
+.macro MRXBOPC m opc v1 v2=0 v3=0 v4=0
+ MRXB \m, \v1, \v2, \v3, \v4
+ .byte \opc
+.endm
+
+/* Vector support instructions */
+
+/* VECTOR GENERATE BYTE MASK */
+.macro VGBM vr imm2
+ VX_NUM v1, \vr
+ .word (0xE700 | (VX_R(v1) << 4))
+ .word \imm2
+ MRXBOPC 0, 0x44, v1
+.endm
+.macro VZERO vxr
+ VGBM \vxr, 0
+.endm
+.macro VONE vxr
+ VGBM \vxr, 0xFFFF
+.endm
+
+/* VECTOR LOAD VR ELEMENT FROM GR */
+.macro VLVG v, gr, disp, m
+ VX_NUM v1, \v
+ GR_NUM b2, "%r0"
+ GR_NUM r3, \gr
+ .word 0xE700 | (VX_R(v1) << 4) | r3
+ .word (b2 << 12) | (\disp)
+ MRXBOPC \m, 0x22, v1
+.endm
+.macro VLVGB v, gr, index, base
+ VLVG \v, \gr, \index, \base, 0
+.endm
+.macro VLVGH v, gr, index
+ VLVG \v, \gr, \index, 1
+.endm
+.macro VLVGF v, gr, index
+ VLVG \v, \gr, \index, 2
+.endm
+.macro VLVGG v, gr, index
+ VLVG \v, \gr, \index, 3
+.endm
+
+/* VECTOR LOAD */
+.macro VL v, disp, index="%r0", base
+ VX_NUM v1, \v
+ GR_NUM x2, \index
+ GR_NUM b2, \base
+ .word 0xE700 | (VX_R(v1) << 4) | x2
+ .word (b2 << 12) | (\disp)
+ MRXBOPC 0, 0x06, v1
+.endm
+
+/* VECTOR LOAD ELEMENT */
+.macro VLEx vr1, disp, index="%r0", base, m3, opc
+ VX_NUM v1, \vr1
+ GR_NUM x2, \index
+ GR_NUM b2, \base
+ .word 0xE700 | (VX_R(v1) << 4) | x2
+ .word (b2 << 12) | (\disp)
+ MRXBOPC \m3, \opc, v1
+.endm
+.macro VLEB vr1, disp, index="%r0", base, m3
+ VLEx \vr1, \disp, \index, \base, \m3, 0x00
+.endm
+.macro VLEH vr1, disp, index="%r0", base, m3
+ VLEx \vr1, \disp, \index, \base, \m3, 0x01
+.endm
+.macro VLEF vr1, disp, index="%r0", base, m3
+ VLEx \vr1, \disp, \index, \base, \m3, 0x03
+.endm
+.macro VLEG vr1, disp, index="%r0", base, m3
+ VLEx \vr1, \disp, \index, \base, \m3, 0x02
+.endm
+
+/* VECTOR LOAD ELEMENT IMMEDIATE */
+.macro VLEIx vr1, imm2, m3, opc
+ VX_NUM v1, \vr1
+ .word 0xE700 | (VX_R(v1) << 4)
+ .word \imm2
+ MRXBOPC \m3, \opc, v1
+.endm
+.macro VLEIB vr1, imm2, index
+ VLEIx \vr1, \imm2, \index, 0x40
+.endm
+.macro VLEIH vr1, imm2, index
+ VLEIx \vr1, \imm2, \index, 0x41
+.endm
+.macro VLEIF vr1, imm2, index
+ VLEIx \vr1, \imm2, \index, 0x43
+.endm
+.macro VLEIG vr1, imm2, index
+ VLEIx \vr1, \imm2, \index, 0x42
+.endm
+
+/* VECTOR LOAD GR FROM VR ELEMENT */
+.macro VLGV gr, vr, disp, base="%r0", m
+ GR_NUM r1, \gr
+ GR_NUM b2, \base
+ VX_NUM v3, \vr
+ .word 0xE700 | (r1 << 4) | VX_R(v3)
+ .word (b2 << 12) | (\disp)
+ MRXBOPC \m, 0x21, v3
+.endm
+.macro VLGVB gr, vr, disp, base="%r0"
+ VLGV \gr, \vr, \disp, \base, 0
+.endm
+.macro VLGVH gr, vr, disp, base="%r0"
+ VLGV \gr, \vr, \disp, \base, 1
+.endm
+.macro VLGVF gr, vr, disp, base="%r0"
+ VLGV \gr, \vr, \disp, \base, 2
+.endm
+.macro VLGVG gr, vr, disp, base="%r0"
+ VLGV \gr, \vr, \disp, \base, 3
+.endm
+
+/* VECTOR LOAD MULTIPLE */
+.macro VLM vfrom, vto, disp, base
+ VX_NUM v1, \vfrom
+ VX_NUM v3, \vto
+ GR_NUM b2, \base /* Base register */
+ .word 0xE700 | (VX_R(v1) << 4) | VX_R(v3)
+ .word (b2 << 12) | (\disp)
+ MRXBOPC 0, 0x36, v1, v3
+.endm
+
+/* VECTOR STORE MULTIPLE */
+.macro VSTM vfrom, vto, disp, base
+ VX_NUM v1, \vfrom
+ VX_NUM v3, \vto
+ GR_NUM b2, \base /* Base register */
+ .word 0xE700 | (VX_R(v1) << 4) | VX_R(v3)
+ .word (b2 << 12) | (\disp)
+ MRXBOPC 0, 0x3E, v1, v3
+.endm
+
+/* VECTOR PERMUTE */
+.macro VPERM vr1, vr2, vr3, vr4
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ VX_NUM v4, \vr4
+ .word 0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+ .word (VX_R(v3) << 12)
+ MRXBOPC VX_R(v4), 0x8C, v1, v2, v3, v4
+.endm
+
+/* VECTOR UNPACK LOGICAL LOW */
+.macro VUPLL vr1, vr2, m3
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ .word 0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+ .word 0x0000
+ MRXBOPC \m3, 0xD4, v1, v2
+.endm
+.macro VUPLLB vr1, vr2
+ VUPLL \vr1, \vr2, 0
+.endm
+.macro VUPLLH vr1, vr2
+ VUPLL \vr1, \vr2, 1
+.endm
+.macro VUPLLF vr1, vr2
+ VUPLL \vr1, \vr2, 2
+.endm
+
+
+/* Vector integer instructions */
+
+/* VECTOR EXCLUSIVE OR */
+.macro VX vr1, vr2, vr3
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ .word 0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+ .word (VX_R(v3) << 12)
+ MRXBOPC 0, 0x6D, v1, v2, v3
+.endm
+
+/* VECTOR GALOIS FIELD MULTIPLY SUM */
+.macro VGFM vr1, vr2, vr3, m4
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ .word 0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+ .word (VX_R(v3) << 12)
+ MRXBOPC \m4, 0xB4, v1, v2, v3
+.endm
+.macro VGFMB vr1, vr2, vr3
+ VGFM \vr1, \vr2, \vr3, 0
+.endm
+.macro VGFMH vr1, vr2, vr3
+ VGFM \vr1, \vr2, \vr3, 1
+.endm
+.macro VGFMF vr1, vr2, vr3
+ VGFM \vr1, \vr2, \vr3, 2
+.endm
+.macro VGFMG vr1, vr2, vr3
+ VGFM \vr1, \vr2, \vr3, 3
+.endm
+
+/* VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE */
+.macro VGFMA vr1, vr2, vr3, vr4, m5
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ VX_NUM v4, \vr4
+ .word 0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+ .word (VX_R(v3) << 12) | (\m5 << 8)
+ MRXBOPC VX_R(v4), 0xBC, v1, v2, v3, v4
+.endm
+.macro VGFMAB vr1, vr2, vr3, vr4
+ VGFMA \vr1, \vr2, \vr3, \vr4, 0
+.endm
+.macro VGFMAH vr1, vr2, vr3, vr4
+ VGFMA \vr1, \vr2, \vr3, \vr4, 1
+.endm
+.macro VGFMAF vr1, vr2, vr3, vr4
+ VGFMA \vr1, \vr2, \vr3, \vr4, 2
+.endm
+.macro VGFMAG vr1, vr2, vr3, vr4
+ VGFMA \vr1, \vr2, \vr3, \vr4, 3
+.endm
+
+/* VECTOR SHIFT RIGHT LOGICAL BY BYTE */
+.macro VSRLB vr1, vr2, vr3
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ .word 0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+ .word (VX_R(v3) << 12)
+ MRXBOPC 0, 0x7D, v1, v2, v3
+.endm
+
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_S390_VX_INSN_H */
diff --git a/kernel/arch/s390/include/uapi/asm/unistd.h b/kernel/arch/s390/include/uapi/asm/unistd.h
index 67878af25..34ec20247 100644
--- a/kernel/arch/s390/include/uapi/asm/unistd.h
+++ b/kernel/arch/s390/include/uapi/asm/unistd.h
@@ -192,21 +192,21 @@
#define __NR_set_tid_address 252
#define __NR_fadvise64 253
#define __NR_timer_create 254
-#define __NR_timer_settime (__NR_timer_create+1)
-#define __NR_timer_gettime (__NR_timer_create+2)
-#define __NR_timer_getoverrun (__NR_timer_create+3)
-#define __NR_timer_delete (__NR_timer_create+4)
-#define __NR_clock_settime (__NR_timer_create+5)
-#define __NR_clock_gettime (__NR_timer_create+6)
-#define __NR_clock_getres (__NR_timer_create+7)
-#define __NR_clock_nanosleep (__NR_timer_create+8)
+#define __NR_timer_settime 255
+#define __NR_timer_gettime 256
+#define __NR_timer_getoverrun 257
+#define __NR_timer_delete 258
+#define __NR_clock_settime 259
+#define __NR_clock_gettime 260
+#define __NR_clock_getres 261
+#define __NR_clock_nanosleep 262
/* Number 263 is reserved for vserver */
#define __NR_statfs64 265
#define __NR_fstatfs64 266
#define __NR_remap_file_pages 267
-/* Number 268 is reserved for new sys_mbind */
-/* Number 269 is reserved for new sys_get_mempolicy */
-/* Number 270 is reserved for new sys_set_mempolicy */
+#define __NR_mbind 268
+#define __NR_get_mempolicy 269
+#define __NR_set_mempolicy 270
#define __NR_mq_open 271
#define __NR_mq_unlink 272
#define __NR_mq_timedsend 273
@@ -223,7 +223,7 @@
#define __NR_inotify_init 284
#define __NR_inotify_add_watch 285
#define __NR_inotify_rm_watch 286
-/* Number 287 is reserved for new sys_migrate_pages */
+#define __NR_migrate_pages 287
#define __NR_openat 288
#define __NR_mkdirat 289
#define __NR_mknodat 290
@@ -245,7 +245,7 @@
#define __NR_sync_file_range 307
#define __NR_tee 308
#define __NR_vmsplice 309
-/* Number 310 is reserved for new sys_move_pages */
+#define __NR_move_pages 310
#define __NR_getcpu 311
#define __NR_epoll_pwait 312
#define __NR_utimes 313
@@ -290,7 +290,27 @@
#define __NR_s390_pci_mmio_write 352
#define __NR_s390_pci_mmio_read 353
#define __NR_execveat 354
-#define NR_syscalls 355
+#define __NR_userfaultfd 355
+#define __NR_membarrier 356
+#define __NR_recvmmsg 357
+#define __NR_sendmmsg 358
+#define __NR_socket 359
+#define __NR_socketpair 360
+#define __NR_bind 361
+#define __NR_connect 362
+#define __NR_listen 363
+#define __NR_accept4 364
+#define __NR_getsockopt 365
+#define __NR_setsockopt 366
+#define __NR_getsockname 367
+#define __NR_getpeername 368
+#define __NR_sendto 369
+#define __NR_sendmsg 370
+#define __NR_recvfrom 371
+#define __NR_recvmsg 372
+#define __NR_shutdown 373
+#define __NR_mlock2 374
+#define NR_syscalls 375
/*
* There are some system calls that are not present on 64 bit, some
diff --git a/kernel/arch/s390/kernel/Makefile b/kernel/arch/s390/kernel/Makefile
index ffb87617a..dc167a23b 100644
--- a/kernel/arch/s390/kernel/Makefile
+++ b/kernel/arch/s390/kernel/Makefile
@@ -28,6 +28,17 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
CFLAGS_sysinfo.o += -w
+#
+# Use -march=z900 for sclp.c to be able to print an error message if
+# the kernel is started on a machine which is too old
+#
+CFLAGS_REMOVE_sclp.o = $(CC_FLAGS_FTRACE)
+ifneq ($(CC_FLAGS_MARCH),-march=z900)
+CFLAGS_REMOVE_sclp.o += $(CC_FLAGS_MARCH)
+CFLAGS_sclp.o += -march=z900
+endif
+GCOV_PROFILE_sclp.o := n
+
obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o
@@ -55,6 +66,8 @@ obj-$(CONFIG_UPROBES) += uprobes.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o
+obj-$(CONFIG_TRACEPOINTS) += trace.o
+
# vdso
obj-y += vdso64/
obj-$(CONFIG_COMPAT) += vdso32/
diff --git a/kernel/arch/s390/kernel/asm-offsets.c b/kernel/arch/s390/kernel/asm-offsets.c
index c7d1b9d09..dc6c9c604 100644
--- a/kernel/arch/s390/kernel/asm-offsets.c
+++ b/kernel/arch/s390/kernel/asm-offsets.c
@@ -23,56 +23,64 @@
int main(void)
{
- DEFINE(__THREAD_info, offsetof(struct task_struct, stack));
- DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp));
- DEFINE(__THREAD_mm_segment, offsetof(struct task_struct, thread.mm_segment));
+ /* task struct offsets */
+ OFFSET(__TASK_thread_info, task_struct, stack);
+ OFFSET(__TASK_thread, task_struct, thread);
+ OFFSET(__TASK_pid, task_struct, pid);
BLANK();
- DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
+ /* thread struct offsets */
+ OFFSET(__THREAD_ksp, thread_struct, ksp);
+ OFFSET(__THREAD_FPU_fpc, thread_struct, fpu.fpc);
+ OFFSET(__THREAD_FPU_regs, thread_struct, fpu.regs);
+ OFFSET(__THREAD_per_cause, thread_struct, per_event.cause);
+ OFFSET(__THREAD_per_address, thread_struct, per_event.address);
+ OFFSET(__THREAD_per_paid, thread_struct, per_event.paid);
+ OFFSET(__THREAD_trap_tdb, thread_struct, trap_tdb);
BLANK();
- DEFINE(__THREAD_per_cause, offsetof(struct task_struct, thread.per_event.cause));
- DEFINE(__THREAD_per_address, offsetof(struct task_struct, thread.per_event.address));
- DEFINE(__THREAD_per_paid, offsetof(struct task_struct, thread.per_event.paid));
+ /* thread info offsets */
+ OFFSET(__TI_task, thread_info, task);
+ OFFSET(__TI_flags, thread_info, flags);
+ OFFSET(__TI_sysc_table, thread_info, sys_call_table);
+ OFFSET(__TI_cpu, thread_info, cpu);
+ OFFSET(__TI_precount, thread_info, preempt_count);
+ OFFSET(__TI_user_timer, thread_info, user_timer);
+ OFFSET(__TI_system_timer, thread_info, system_timer);
+ OFFSET(__TI_last_break, thread_info, last_break);
BLANK();
- DEFINE(__TI_task, offsetof(struct thread_info, task));
- DEFINE(__TI_flags, offsetof(struct thread_info, flags));
- DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table));
- DEFINE(__TI_cpu, offsetof(struct thread_info, cpu));
- DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count));
- DEFINE(__TI_user_timer, offsetof(struct thread_info, user_timer));
- DEFINE(__TI_system_timer, offsetof(struct thread_info, system_timer));
- DEFINE(__TI_last_break, offsetof(struct thread_info, last_break));
- BLANK();
- DEFINE(__PT_ARGS, offsetof(struct pt_regs, args));
- DEFINE(__PT_PSW, offsetof(struct pt_regs, psw));
- DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs));
- DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2));
- DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code));
- DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm));
- DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long));
- DEFINE(__PT_FLAGS, offsetof(struct pt_regs, flags));
+ /* pt_regs offsets */
+ OFFSET(__PT_ARGS, pt_regs, args);
+ OFFSET(__PT_PSW, pt_regs, psw);
+ OFFSET(__PT_GPRS, pt_regs, gprs);
+ OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2);
+ OFFSET(__PT_INT_CODE, pt_regs, int_code);
+ OFFSET(__PT_INT_PARM, pt_regs, int_parm);
+ OFFSET(__PT_INT_PARM_LONG, pt_regs, int_parm_long);
+ OFFSET(__PT_FLAGS, pt_regs, flags);
DEFINE(__PT_SIZE, sizeof(struct pt_regs));
BLANK();
- DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain));
- DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs));
- DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1));
+ /* stack_frame offsets */
+ OFFSET(__SF_BACKCHAIN, stack_frame, back_chain);
+ OFFSET(__SF_GPRS, stack_frame, gprs);
+ OFFSET(__SF_EMPTY, stack_frame, empty1);
BLANK();
/* timeval/timezone offsets for use by vdso */
- DEFINE(__VDSO_UPD_COUNT, offsetof(struct vdso_data, tb_update_count));
- DEFINE(__VDSO_XTIME_STAMP, offsetof(struct vdso_data, xtime_tod_stamp));
- DEFINE(__VDSO_XTIME_SEC, offsetof(struct vdso_data, xtime_clock_sec));
- DEFINE(__VDSO_XTIME_NSEC, offsetof(struct vdso_data, xtime_clock_nsec));
- DEFINE(__VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
- DEFINE(__VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec));
- DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec));
- DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
- DEFINE(__VDSO_WTOM_CRS_SEC, offsetof(struct vdso_data, wtom_coarse_sec));
- DEFINE(__VDSO_WTOM_CRS_NSEC, offsetof(struct vdso_data, wtom_coarse_nsec));
- DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest));
- DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available));
- DEFINE(__VDSO_TK_MULT, offsetof(struct vdso_data, tk_mult));
- DEFINE(__VDSO_TK_SHIFT, offsetof(struct vdso_data, tk_shift));
- DEFINE(__VDSO_ECTG_BASE, offsetof(struct vdso_per_cpu_data, ectg_timer_base));
- DEFINE(__VDSO_ECTG_USER, offsetof(struct vdso_per_cpu_data, ectg_user_time));
+ OFFSET(__VDSO_UPD_COUNT, vdso_data, tb_update_count);
+ OFFSET(__VDSO_XTIME_STAMP, vdso_data, xtime_tod_stamp);
+ OFFSET(__VDSO_XTIME_SEC, vdso_data, xtime_clock_sec);
+ OFFSET(__VDSO_XTIME_NSEC, vdso_data, xtime_clock_nsec);
+ OFFSET(__VDSO_XTIME_CRS_SEC, vdso_data, xtime_coarse_sec);
+ OFFSET(__VDSO_XTIME_CRS_NSEC, vdso_data, xtime_coarse_nsec);
+ OFFSET(__VDSO_WTOM_SEC, vdso_data, wtom_clock_sec);
+ OFFSET(__VDSO_WTOM_NSEC, vdso_data, wtom_clock_nsec);
+ OFFSET(__VDSO_WTOM_CRS_SEC, vdso_data, wtom_coarse_sec);
+ OFFSET(__VDSO_WTOM_CRS_NSEC, vdso_data, wtom_coarse_nsec);
+ OFFSET(__VDSO_TIMEZONE, vdso_data, tz_minuteswest);
+ OFFSET(__VDSO_ECTG_OK, vdso_data, ectg_available);
+ OFFSET(__VDSO_TK_MULT, vdso_data, tk_mult);
+ OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift);
+ OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base);
+ OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time);
+ BLANK();
/* constants used by the vdso */
DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC);
@@ -83,102 +91,106 @@ int main(void)
DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC);
BLANK();
/* idle data offsets */
- DEFINE(__CLOCK_IDLE_ENTER, offsetof(struct s390_idle_data, clock_idle_enter));
- DEFINE(__CLOCK_IDLE_EXIT, offsetof(struct s390_idle_data, clock_idle_exit));
- DEFINE(__TIMER_IDLE_ENTER, offsetof(struct s390_idle_data, timer_idle_enter));
- DEFINE(__TIMER_IDLE_EXIT, offsetof(struct s390_idle_data, timer_idle_exit));
- /* lowcore offsets */
- DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params));
- DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr));
- DEFINE(__LC_EXT_INT_CODE, offsetof(struct _lowcore, ext_int_code));
- DEFINE(__LC_SVC_ILC, offsetof(struct _lowcore, svc_ilc));
- DEFINE(__LC_SVC_INT_CODE, offsetof(struct _lowcore, svc_code));
- DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc));
- DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code));
- DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code));
- DEFINE(__LC_MON_CLASS_NR, offsetof(struct _lowcore, mon_class_num));
- DEFINE(__LC_PER_CODE, offsetof(struct _lowcore, per_code));
- DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_atmid));
- DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address));
- DEFINE(__LC_EXC_ACCESS_ID, offsetof(struct _lowcore, exc_access_id));
- DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id));
- DEFINE(__LC_OP_ACCESS_ID, offsetof(struct _lowcore, op_access_id));
- DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_mode_id));
- DEFINE(__LC_MON_CODE, offsetof(struct _lowcore, monitor_code));
- DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id));
- DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr));
- DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm));
- DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word));
- DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list));
- DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code));
- DEFINE(__LC_MCCK_EXT_DAM_CODE, offsetof(struct _lowcore, external_damage_code));
- DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw));
- DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw));
- DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw));
- DEFINE(__LC_PGM_OLD_PSW, offsetof(struct _lowcore, program_old_psw));
- DEFINE(__LC_MCK_OLD_PSW, offsetof(struct _lowcore, mcck_old_psw));
- DEFINE(__LC_IO_OLD_PSW, offsetof(struct _lowcore, io_old_psw));
- DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw));
- DEFINE(__LC_EXT_NEW_PSW, offsetof(struct _lowcore, external_new_psw));
- DEFINE(__LC_SVC_NEW_PSW, offsetof(struct _lowcore, svc_new_psw));
- DEFINE(__LC_PGM_NEW_PSW, offsetof(struct _lowcore, program_new_psw));
- DEFINE(__LC_MCK_NEW_PSW, offsetof(struct _lowcore, mcck_new_psw));
- DEFINE(__LC_IO_NEW_PSW, offsetof(struct _lowcore, io_new_psw));
+ OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter);
+ OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit);
+ OFFSET(__TIMER_IDLE_ENTER, s390_idle_data, timer_idle_enter);
+ OFFSET(__TIMER_IDLE_EXIT, s390_idle_data, timer_idle_exit);
BLANK();
- DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync));
- DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async));
- DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart));
- DEFINE(__LC_CPU_FLAGS, offsetof(struct _lowcore, cpu_flags));
- DEFINE(__LC_RETURN_PSW, offsetof(struct _lowcore, return_psw));
- DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw));
- DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer));
- DEFINE(__LC_ASYNC_ENTER_TIMER, offsetof(struct _lowcore, async_enter_timer));
- DEFINE(__LC_MCCK_ENTER_TIMER, offsetof(struct _lowcore, mcck_enter_timer));
- DEFINE(__LC_EXIT_TIMER, offsetof(struct _lowcore, exit_timer));
- DEFINE(__LC_USER_TIMER, offsetof(struct _lowcore, user_timer));
- DEFINE(__LC_SYSTEM_TIMER, offsetof(struct _lowcore, system_timer));
- DEFINE(__LC_STEAL_TIMER, offsetof(struct _lowcore, steal_timer));
- DEFINE(__LC_LAST_UPDATE_TIMER, offsetof(struct _lowcore, last_update_timer));
- DEFINE(__LC_LAST_UPDATE_CLOCK, offsetof(struct _lowcore, last_update_clock));
- DEFINE(__LC_CURRENT, offsetof(struct _lowcore, current_task));
- DEFINE(__LC_CURRENT_PID, offsetof(struct _lowcore, current_pid));
- DEFINE(__LC_THREAD_INFO, offsetof(struct _lowcore, thread_info));
- DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack));
- DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack));
- DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack));
- DEFINE(__LC_RESTART_STACK, offsetof(struct _lowcore, restart_stack));
- DEFINE(__LC_RESTART_FN, offsetof(struct _lowcore, restart_fn));
- DEFINE(__LC_RESTART_DATA, offsetof(struct _lowcore, restart_data));
- DEFINE(__LC_RESTART_SOURCE, offsetof(struct _lowcore, restart_source));
- DEFINE(__LC_KERNEL_ASCE, offsetof(struct _lowcore, kernel_asce));
- DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce));
- DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock));
- DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock));
- DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags));
- DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib));
+ /* hardware defined lowcore locations 0x000 - 0x1ff */
+ OFFSET(__LC_EXT_PARAMS, _lowcore, ext_params);
+ OFFSET(__LC_EXT_CPU_ADDR, _lowcore, ext_cpu_addr);
+ OFFSET(__LC_EXT_INT_CODE, _lowcore, ext_int_code);
+ OFFSET(__LC_SVC_ILC, _lowcore, svc_ilc);
+ OFFSET(__LC_SVC_INT_CODE, _lowcore, svc_code);
+ OFFSET(__LC_PGM_ILC, _lowcore, pgm_ilc);
+ OFFSET(__LC_PGM_INT_CODE, _lowcore, pgm_code);
+ OFFSET(__LC_DATA_EXC_CODE, _lowcore, data_exc_code);
+ OFFSET(__LC_MON_CLASS_NR, _lowcore, mon_class_num);
+ OFFSET(__LC_PER_CODE, _lowcore, per_code);
+ OFFSET(__LC_PER_ATMID, _lowcore, per_atmid);
+ OFFSET(__LC_PER_ADDRESS, _lowcore, per_address);
+ OFFSET(__LC_EXC_ACCESS_ID, _lowcore, exc_access_id);
+ OFFSET(__LC_PER_ACCESS_ID, _lowcore, per_access_id);
+ OFFSET(__LC_OP_ACCESS_ID, _lowcore, op_access_id);
+ OFFSET(__LC_AR_MODE_ID, _lowcore, ar_mode_id);
+ OFFSET(__LC_TRANS_EXC_CODE, _lowcore, trans_exc_code);
+ OFFSET(__LC_MON_CODE, _lowcore, monitor_code);
+ OFFSET(__LC_SUBCHANNEL_ID, _lowcore, subchannel_id);
+ OFFSET(__LC_SUBCHANNEL_NR, _lowcore, subchannel_nr);
+ OFFSET(__LC_IO_INT_PARM, _lowcore, io_int_parm);
+ OFFSET(__LC_IO_INT_WORD, _lowcore, io_int_word);
+ OFFSET(__LC_STFL_FAC_LIST, _lowcore, stfl_fac_list);
+ OFFSET(__LC_MCCK_CODE, _lowcore, mcck_interruption_code);
+ OFFSET(__LC_MCCK_FAIL_STOR_ADDR, _lowcore, failing_storage_address);
+ OFFSET(__LC_LAST_BREAK, _lowcore, breaking_event_addr);
+ OFFSET(__LC_RST_OLD_PSW, _lowcore, restart_old_psw);
+ OFFSET(__LC_EXT_OLD_PSW, _lowcore, external_old_psw);
+ OFFSET(__LC_SVC_OLD_PSW, _lowcore, svc_old_psw);
+ OFFSET(__LC_PGM_OLD_PSW, _lowcore, program_old_psw);
+ OFFSET(__LC_MCK_OLD_PSW, _lowcore, mcck_old_psw);
+ OFFSET(__LC_IO_OLD_PSW, _lowcore, io_old_psw);
+ OFFSET(__LC_RST_NEW_PSW, _lowcore, restart_psw);
+ OFFSET(__LC_EXT_NEW_PSW, _lowcore, external_new_psw);
+ OFFSET(__LC_SVC_NEW_PSW, _lowcore, svc_new_psw);
+ OFFSET(__LC_PGM_NEW_PSW, _lowcore, program_new_psw);
+ OFFSET(__LC_MCK_NEW_PSW, _lowcore, mcck_new_psw);
+ OFFSET(__LC_IO_NEW_PSW, _lowcore, io_new_psw);
+ /* software defined lowcore locations 0x200 - 0xdff*/
+ OFFSET(__LC_SAVE_AREA_SYNC, _lowcore, save_area_sync);
+ OFFSET(__LC_SAVE_AREA_ASYNC, _lowcore, save_area_async);
+ OFFSET(__LC_SAVE_AREA_RESTART, _lowcore, save_area_restart);
+ OFFSET(__LC_CPU_FLAGS, _lowcore, cpu_flags);
+ OFFSET(__LC_RETURN_PSW, _lowcore, return_psw);
+ OFFSET(__LC_RETURN_MCCK_PSW, _lowcore, return_mcck_psw);
+ OFFSET(__LC_SYNC_ENTER_TIMER, _lowcore, sync_enter_timer);
+ OFFSET(__LC_ASYNC_ENTER_TIMER, _lowcore, async_enter_timer);
+ OFFSET(__LC_MCCK_ENTER_TIMER, _lowcore, mcck_enter_timer);
+ OFFSET(__LC_EXIT_TIMER, _lowcore, exit_timer);
+ OFFSET(__LC_USER_TIMER, _lowcore, user_timer);
+ OFFSET(__LC_SYSTEM_TIMER, _lowcore, system_timer);
+ OFFSET(__LC_STEAL_TIMER, _lowcore, steal_timer);
+ OFFSET(__LC_LAST_UPDATE_TIMER, _lowcore, last_update_timer);
+ OFFSET(__LC_LAST_UPDATE_CLOCK, _lowcore, last_update_clock);
+ OFFSET(__LC_INT_CLOCK, _lowcore, int_clock);
+ OFFSET(__LC_MCCK_CLOCK, _lowcore, mcck_clock);
+ OFFSET(__LC_CURRENT, _lowcore, current_task);
+ OFFSET(__LC_THREAD_INFO, _lowcore, thread_info);
+ OFFSET(__LC_KERNEL_STACK, _lowcore, kernel_stack);
+ OFFSET(__LC_ASYNC_STACK, _lowcore, async_stack);
+ OFFSET(__LC_PANIC_STACK, _lowcore, panic_stack);
+ OFFSET(__LC_RESTART_STACK, _lowcore, restart_stack);
+ OFFSET(__LC_RESTART_FN, _lowcore, restart_fn);
+ OFFSET(__LC_RESTART_DATA, _lowcore, restart_data);
+ OFFSET(__LC_RESTART_SOURCE, _lowcore, restart_source);
+ OFFSET(__LC_USER_ASCE, _lowcore, user_asce);
+ OFFSET(__LC_LPP, _lowcore, lpp);
+ OFFSET(__LC_CURRENT_PID, _lowcore, current_pid);
+ OFFSET(__LC_PERCPU_OFFSET, _lowcore, percpu_offset);
+ OFFSET(__LC_VDSO_PER_CPU, _lowcore, vdso_per_cpu_data);
+ OFFSET(__LC_MACHINE_FLAGS, _lowcore, machine_flags);
+ OFFSET(__LC_GMAP, _lowcore, gmap);
+ OFFSET(__LC_PASTE, _lowcore, paste);
+ /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
+ OFFSET(__LC_DUMP_REIPL, _lowcore, ipib);
+ /* hardware defined lowcore locations 0x1000 - 0x18ff */
+ OFFSET(__LC_VX_SAVE_AREA_ADDR, _lowcore, vector_save_area_addr);
+ OFFSET(__LC_EXT_PARAMS2, _lowcore, ext_params2);
+ OFFSET(SAVE_AREA_BASE, _lowcore, floating_pt_save_area);
+ OFFSET(__LC_FPREGS_SAVE_AREA, _lowcore, floating_pt_save_area);
+ OFFSET(__LC_GPREGS_SAVE_AREA, _lowcore, gpregs_save_area);
+ OFFSET(__LC_PSW_SAVE_AREA, _lowcore, psw_save_area);
+ OFFSET(__LC_PREFIX_SAVE_AREA, _lowcore, prefixreg_save_area);
+ OFFSET(__LC_FP_CREG_SAVE_AREA, _lowcore, fpt_creg_save_area);
+ OFFSET(__LC_TOD_PROGREG_SAVE_AREA, _lowcore, tod_progreg_save_area);
+ OFFSET(__LC_CPU_TIMER_SAVE_AREA, _lowcore, cpu_timer_save_area);
+ OFFSET(__LC_CLOCK_COMP_SAVE_AREA, _lowcore, clock_comp_save_area);
+ OFFSET(__LC_AREGS_SAVE_AREA, _lowcore, access_regs_save_area);
+ OFFSET(__LC_CREGS_SAVE_AREA, _lowcore, cregs_save_area);
+ OFFSET(__LC_PGM_TDB, _lowcore, pgm_tdb);
BLANK();
- DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area));
- DEFINE(__LC_CLOCK_COMP_SAVE_AREA, offsetof(struct _lowcore, clock_comp_save_area));
- DEFINE(__LC_PSW_SAVE_AREA, offsetof(struct _lowcore, psw_save_area));
- DEFINE(__LC_PREFIX_SAVE_AREA, offsetof(struct _lowcore, prefixreg_save_area));
- DEFINE(__LC_AREGS_SAVE_AREA, offsetof(struct _lowcore, access_regs_save_area));
- DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area));
- DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area));
- DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area));
- DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code));
- DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address));
- DEFINE(__LC_VX_SAVE_AREA_ADDR, offsetof(struct _lowcore, vector_save_area_addr));
- DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2));
- DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area));
- DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste));
- DEFINE(__LC_FP_CREG_SAVE_AREA, offsetof(struct _lowcore, fpt_creg_save_area));
- DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr));
- DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data));
- DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap));
- DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb));
- DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb));
- DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce));
- DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c));
- DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20));
+ /* gmap/sie offsets */
+ OFFSET(__GMAP_ASCE, gmap, asce);
+ OFFSET(__SIE_PROG0C, kvm_s390_sie_block, prog0c);
+ OFFSET(__SIE_PROG20, kvm_s390_sie_block, prog20);
return 0;
}
diff --git a/kernel/arch/s390/kernel/base.S b/kernel/arch/s390/kernel/base.S
index daed3fde4..326f717df 100644
--- a/kernel/arch/s390/kernel/base.S
+++ b/kernel/arch/s390/kernel/base.S
@@ -78,15 +78,20 @@ s390_base_pgm_handler_fn:
#
# Calls diag 308 subcode 1 and continues execution
#
-# The following conditions must be ensured before calling this function:
-# * Prefix register = 0
-# * Lowcore protection is disabled
-#
ENTRY(diag308_reset)
larl %r4,.Lctlregs # Save control registers
stctg %c0,%c15,0(%r4)
+ lg %r2,0(%r4) # Disable lowcore protection
+ nilh %r2,0xefff
+ larl %r4,.Lctlreg0
+ stg %r2,0(%r4)
+ lctlg %c0,%c0,0(%r4)
larl %r4,.Lfpctl # Floating point control register
stfpc 0(%r4)
+ larl %r4,.Lprefix # Save prefix register
+ stpx 0(%r4)
+ larl %r4,.Lprefix_zero # Set prefix register to 0
+ spx 0(%r4)
larl %r4,.Lcontinue_psw # Save PSW flags
epsw %r2,%r3
stm %r2,%r3,0(%r4)
@@ -106,6 +111,8 @@ ENTRY(diag308_reset)
lctlg %c0,%c15,0(%r4)
larl %r4,.Lfpctl # Restore floating point ctl register
lfpc 0(%r4)
+ larl %r4,.Lprefix # Restore prefix register
+ spx 0(%r4)
larl %r4,.Lcontinue_psw # Restore PSW flags
lpswe 0(%r4)
.Lcontinue:
@@ -122,10 +129,16 @@ ENTRY(diag308_reset)
.section .bss
.align 8
+.Lctlreg0:
+ .quad 0
.Lctlregs:
.rept 16
.quad 0
.endr
.Lfpctl:
.long 0
+.Lprefix:
+ .long 0
+.Lprefix_zero:
+ .long 0
.previous
diff --git a/kernel/arch/s390/kernel/compat_signal.c b/kernel/arch/s390/kernel/compat_signal.c
index fe8d6924e..4af60374e 100644
--- a/kernel/arch/s390/kernel/compat_signal.c
+++ b/kernel/arch/s390/kernel/compat_signal.c
@@ -48,6 +48,19 @@ typedef struct
struct ucontext32 uc;
} rt_sigframe32;
+static inline void sigset_to_sigset32(unsigned long *set64,
+ compat_sigset_word *set32)
+{
+ set32[0] = (compat_sigset_word) set64[0];
+ set32[1] = (compat_sigset_word)(set64[0] >> 32);
+}
+
+static inline void sigset32_to_sigset(compat_sigset_word *set32,
+ unsigned long *set64)
+{
+ set64[0] = (unsigned long) set32[0] | ((unsigned long) set32[1] << 32);
+}
+
int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
{
int err;
@@ -153,33 +166,14 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
/* Store registers needed to create the signal frame */
static void store_sigregs(void)
{
- int i;
-
save_access_regs(current->thread.acrs);
- save_fp_ctl(&current->thread.fp_regs.fpc);
- if (current->thread.vxrs) {
- save_vx_regs(current->thread.vxrs);
- for (i = 0; i < __NUM_FPRS; i++)
- current->thread.fp_regs.fprs[i] =
- *(freg_t *)(current->thread.vxrs + i);
- } else
- save_fp_regs(current->thread.fp_regs.fprs);
+ save_fpu_regs();
}
/* Load registers after signal return */
static void load_sigregs(void)
{
- int i;
-
restore_access_regs(current->thread.acrs);
- /* restore_fp_ctl is done in restore_sigregs */
- if (current->thread.vxrs) {
- for (i = 0; i < __NUM_FPRS; i++)
- *(freg_t *)(current->thread.vxrs + i) =
- current->thread.fp_regs.fprs[i];
- restore_vx_regs(current->thread.vxrs);
- } else
- restore_fp_regs(current->thread.fp_regs.fprs);
}
static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
@@ -196,8 +190,7 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
user_sregs.regs.gprs[i] = (__u32) regs->gprs[i];
memcpy(&user_sregs.regs.acrs, current->thread.acrs,
sizeof(user_sregs.regs.acrs));
- memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
- sizeof(user_sregs.fpregs));
+ fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32)))
return -EFAULT;
return 0;
@@ -217,8 +210,8 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI))
return -EINVAL;
- /* Loading the floating-point-control word can fail. Do that first. */
- if (restore_fp_ctl(&user_sregs.fpregs.fpc))
+ /* Test the floating-point-control word. */
+ if (test_fp_ctl(user_sregs.fpregs.fpc))
return -EINVAL;
/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
@@ -235,9 +228,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
regs->gprs[i] = (__u64) user_sregs.regs.gprs[i];
memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
sizeof(current->thread.acrs));
-
- memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
- sizeof(current->thread.fp_regs));
+ fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
return 0;
@@ -258,13 +249,13 @@ static int save_sigregs_ext32(struct pt_regs *regs,
return -EFAULT;
/* Save vector registers to signal stack */
- if (current->thread.vxrs) {
+ if (MACHINE_HAS_VX) {
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1);
+ vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
sizeof(sregs_ext->vxrs_low)) ||
__copy_to_user(&sregs_ext->vxrs_high,
- current->thread.vxrs + __NUM_VXRS_LOW,
+ current->thread.fpu.vxrs + __NUM_VXRS_LOW,
sizeof(sregs_ext->vxrs_high)))
return -EFAULT;
}
@@ -280,21 +271,21 @@ static int restore_sigregs_ext32(struct pt_regs *regs,
/* Restore high gprs from signal stack */
if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high,
- sizeof(&sregs_ext->gprs_high)))
+ sizeof(sregs_ext->gprs_high)))
return -EFAULT;
for (i = 0; i < NUM_GPRS; i++)
*(__u32 *)&regs->gprs[i] = gprs_high[i];
/* Restore vector registers from signal stack */
- if (current->thread.vxrs) {
+ if (MACHINE_HAS_VX) {
if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
sizeof(sregs_ext->vxrs_low)) ||
- __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW,
+ __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
&sregs_ext->vxrs_high,
sizeof(sregs_ext->vxrs_high)))
return -EFAULT;
for (i = 0; i < __NUM_VXRS_LOW; i++)
- *((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
+ *((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
}
return 0;
}
@@ -303,11 +294,14 @@ COMPAT_SYSCALL_DEFINE0(sigreturn)
{
struct pt_regs *regs = task_pt_regs(current);
sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15];
+ compat_sigset_t cset;
sigset_t set;
- if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32))
+ if (__copy_from_user(&cset.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32))
goto badframe;
+ sigset32_to_sigset(cset.sig, set.sig);
set_current_blocked(&set);
+ save_fpu_regs();
if (restore_sigregs32(regs, &frame->sregs))
goto badframe;
if (restore_sigregs_ext32(regs, &frame->sregs_ext))
@@ -323,13 +317,16 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
{
struct pt_regs *regs = task_pt_regs(current);
rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15];
+ compat_sigset_t cset;
sigset_t set;
- if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+ if (__copy_from_user(&cset, &frame->uc.uc_sigmask, sizeof(cset)))
goto badframe;
+ sigset32_to_sigset(cset.sig, set.sig);
set_current_blocked(&set);
if (compat_restore_altstack(&frame->uc.uc_stack))
goto badframe;
+ save_fpu_regs();
if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
goto badframe;
if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
@@ -397,7 +394,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set,
return -EFAULT;
/* Create struct sigcontext32 on the signal stack */
- memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32);
+ sigset_to_sigset32(set->sig, sc.oldmask);
sc.sregs = (__u32)(unsigned long __force) &frame->sregs;
if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc)))
return -EFAULT;
@@ -458,6 +455,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set,
static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
struct pt_regs *regs)
{
+ compat_sigset_t cset;
rt_sigframe32 __user *frame;
unsigned long restorer;
size_t frame_size;
@@ -472,8 +470,7 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
*/
uc_flags = UC_GPRS_HIGH;
if (MACHINE_HAS_VX) {
- if (current->thread.vxrs)
- uc_flags |= UC_VXRS;
+ uc_flags |= UC_VXRS;
} else
frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
sizeof(frame->uc.uc_mcontext_ext.vxrs_high);
@@ -505,11 +502,12 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
store_sigregs();
/* Create ucontext on the signal stack. */
+ sigset_to_sigset32(set->sig, cset.sig);
if (__put_user(uc_flags, &frame->uc.uc_flags) ||
__put_user(0, &frame->uc.uc_link) ||
__compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) ||
save_sigregs32(regs, &frame->uc.uc_mcontext) ||
- __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) ||
+ __copy_to_user(&frame->uc.uc_sigmask, &cset, sizeof(cset)) ||
save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
return -EFAULT;
diff --git a/kernel/arch/s390/kernel/compat_wrapper.c b/kernel/arch/s390/kernel/compat_wrapper.c
index d7fa2f0f1..fac4eedde 100644
--- a/kernel/arch/s390/kernel/compat_wrapper.c
+++ b/kernel/arch/s390/kernel/compat_wrapper.c
@@ -52,15 +52,13 @@
* the regular system call wrappers.
*/
#define COMPAT_SYSCALL_WRAPx(x, name, ...) \
- asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
- asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__));\
- asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__)) \
- { \
- return sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__)); \
- }
+asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
+asmlinkage long notrace compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__));\
+asmlinkage long notrace compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__)) \
+{ \
+ return sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__)); \
+}
-COMPAT_SYSCALL_WRAP1(exit, int, error_code);
-COMPAT_SYSCALL_WRAP1(close, unsigned int, fd);
COMPAT_SYSCALL_WRAP2(creat, const char __user *, pathname, umode_t, mode);
COMPAT_SYSCALL_WRAP2(link, const char __user *, oldname, const char __user *, newname);
COMPAT_SYSCALL_WRAP1(unlink, const char __user *, pathname);
@@ -68,23 +66,16 @@ COMPAT_SYSCALL_WRAP1(chdir, const char __user *, filename);
COMPAT_SYSCALL_WRAP3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev);
COMPAT_SYSCALL_WRAP2(chmod, const char __user *, filename, umode_t, mode);
COMPAT_SYSCALL_WRAP1(oldumount, char __user *, name);
-COMPAT_SYSCALL_WRAP1(alarm, unsigned int, seconds);
COMPAT_SYSCALL_WRAP2(access, const char __user *, filename, int, mode);
-COMPAT_SYSCALL_WRAP1(nice, int, increment);
-COMPAT_SYSCALL_WRAP2(kill, int, pid, int, sig);
COMPAT_SYSCALL_WRAP2(rename, const char __user *, oldname, const char __user *, newname);
COMPAT_SYSCALL_WRAP2(mkdir, const char __user *, pathname, umode_t, mode);
COMPAT_SYSCALL_WRAP1(rmdir, const char __user *, pathname);
-COMPAT_SYSCALL_WRAP1(dup, unsigned int, fildes);
COMPAT_SYSCALL_WRAP1(pipe, int __user *, fildes);
COMPAT_SYSCALL_WRAP1(brk, unsigned long, brk);
COMPAT_SYSCALL_WRAP2(signal, int, sig, __sighandler_t, handler);
COMPAT_SYSCALL_WRAP1(acct, const char __user *, name);
COMPAT_SYSCALL_WRAP2(umount, char __user *, name, int, flags);
-COMPAT_SYSCALL_WRAP2(setpgid, pid_t, pid, pid_t, pgid);
-COMPAT_SYSCALL_WRAP1(umask, int, mask);
COMPAT_SYSCALL_WRAP1(chroot, const char __user *, filename);
-COMPAT_SYSCALL_WRAP2(dup2, unsigned int, oldfd, unsigned int, newfd);
COMPAT_SYSCALL_WRAP3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask);
COMPAT_SYSCALL_WRAP2(sethostname, char __user *, name, int, len);
COMPAT_SYSCALL_WRAP2(symlink, const char __user *, old, const char __user *, new);
@@ -93,37 +84,23 @@ COMPAT_SYSCALL_WRAP1(uselib, const char __user *, library);
COMPAT_SYSCALL_WRAP2(swapon, const char __user *, specialfile, int, swap_flags);
COMPAT_SYSCALL_WRAP4(reboot, int, magic1, int, magic2, unsigned int, cmd, void __user *, arg);
COMPAT_SYSCALL_WRAP2(munmap, unsigned long, addr, size_t, len);
-COMPAT_SYSCALL_WRAP2(fchmod, unsigned int, fd, umode_t, mode);
-COMPAT_SYSCALL_WRAP2(getpriority, int, which, int, who);
-COMPAT_SYSCALL_WRAP3(setpriority, int, which, int, who, int, niceval);
COMPAT_SYSCALL_WRAP3(syslog, int, type, char __user *, buf, int, len);
COMPAT_SYSCALL_WRAP1(swapoff, const char __user *, specialfile);
-COMPAT_SYSCALL_WRAP1(fsync, unsigned int, fd);
COMPAT_SYSCALL_WRAP2(setdomainname, char __user *, name, int, len);
COMPAT_SYSCALL_WRAP1(newuname, struct new_utsname __user *, name);
COMPAT_SYSCALL_WRAP3(mprotect, unsigned long, start, size_t, len, unsigned long, prot);
COMPAT_SYSCALL_WRAP3(init_module, void __user *, umod, unsigned long, len, const char __user *, uargs);
COMPAT_SYSCALL_WRAP2(delete_module, const char __user *, name_user, unsigned int, flags);
COMPAT_SYSCALL_WRAP4(quotactl, unsigned int, cmd, const char __user *, special, qid_t, id, void __user *, addr);
-COMPAT_SYSCALL_WRAP1(getpgid, pid_t, pid);
-COMPAT_SYSCALL_WRAP1(fchdir, unsigned int, fd);
COMPAT_SYSCALL_WRAP2(bdflush, int, func, long, data);
COMPAT_SYSCALL_WRAP3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2);
-COMPAT_SYSCALL_WRAP1(s390_personality, unsigned int, personality);
COMPAT_SYSCALL_WRAP5(llseek, unsigned int, fd, unsigned long, high, unsigned long, low, loff_t __user *, result, unsigned int, whence);
-COMPAT_SYSCALL_WRAP2(flock, unsigned int, fd, unsigned int, cmd);
COMPAT_SYSCALL_WRAP3(msync, unsigned long, start, size_t, len, int, flags);
-COMPAT_SYSCALL_WRAP1(getsid, pid_t, pid);
-COMPAT_SYSCALL_WRAP1(fdatasync, unsigned int, fd);
COMPAT_SYSCALL_WRAP2(mlock, unsigned long, start, size_t, len);
COMPAT_SYSCALL_WRAP2(munlock, unsigned long, start, size_t, len);
-COMPAT_SYSCALL_WRAP1(mlockall, int, flags);
COMPAT_SYSCALL_WRAP2(sched_setparam, pid_t, pid, struct sched_param __user *, param);
COMPAT_SYSCALL_WRAP2(sched_getparam, pid_t, pid, struct sched_param __user *, param);
COMPAT_SYSCALL_WRAP3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param);
-COMPAT_SYSCALL_WRAP1(sched_getscheduler, pid_t, pid);
-COMPAT_SYSCALL_WRAP1(sched_get_priority_max, int, policy);
-COMPAT_SYSCALL_WRAP1(sched_get_priority_min, int, policy);
COMPAT_SYSCALL_WRAP5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, unsigned long, new_addr);
COMPAT_SYSCALL_WRAP3(poll, struct pollfd __user *, ufds, unsigned int, nfds, int, timeout);
COMPAT_SYSCALL_WRAP5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5);
@@ -131,20 +108,11 @@ COMPAT_SYSCALL_WRAP2(getcwd, char __user *, buf, unsigned long, size);
COMPAT_SYSCALL_WRAP2(capget, cap_user_header_t, header, cap_user_data_t, dataptr);
COMPAT_SYSCALL_WRAP2(capset, cap_user_header_t, header, const cap_user_data_t, data);
COMPAT_SYSCALL_WRAP3(lchown, const char __user *, filename, uid_t, user, gid_t, group);
-COMPAT_SYSCALL_WRAP2(setreuid, uid_t, ruid, uid_t, euid);
-COMPAT_SYSCALL_WRAP2(setregid, gid_t, rgid, gid_t, egid);
COMPAT_SYSCALL_WRAP2(getgroups, int, gidsetsize, gid_t __user *, grouplist);
COMPAT_SYSCALL_WRAP2(setgroups, int, gidsetsize, gid_t __user *, grouplist);
-COMPAT_SYSCALL_WRAP3(fchown, unsigned int, fd, uid_t, user, gid_t, group);
-COMPAT_SYSCALL_WRAP3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid);
COMPAT_SYSCALL_WRAP3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid);
-COMPAT_SYSCALL_WRAP3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid);
COMPAT_SYSCALL_WRAP3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid);
COMPAT_SYSCALL_WRAP3(chown, const char __user *, filename, uid_t, user, gid_t, group);
-COMPAT_SYSCALL_WRAP1(setuid, uid_t, uid);
-COMPAT_SYSCALL_WRAP1(setgid, gid_t, gid);
-COMPAT_SYSCALL_WRAP1(setfsuid, uid_t, uid);
-COMPAT_SYSCALL_WRAP1(setfsgid, gid_t, gid);
COMPAT_SYSCALL_WRAP2(pivot_root, const char __user *, new_root, const char __user *, put_old);
COMPAT_SYSCALL_WRAP3(mincore, unsigned long, start, size_t, len, unsigned char __user *, vec);
COMPAT_SYSCALL_WRAP3(madvise, unsigned long, start, size_t, len, int, behavior);
@@ -161,23 +129,16 @@ COMPAT_SYSCALL_WRAP3(flistxattr, int, fd, char __user *, list, size_t, size);
COMPAT_SYSCALL_WRAP2(removexattr, const char __user *, path, const char __user *, name);
COMPAT_SYSCALL_WRAP2(lremovexattr, const char __user *, path, const char __user *, name);
COMPAT_SYSCALL_WRAP2(fremovexattr, int, fd, const char __user *, name);
-COMPAT_SYSCALL_WRAP1(exit_group, int, error_code);
COMPAT_SYSCALL_WRAP1(set_tid_address, int __user *, tidptr);
-COMPAT_SYSCALL_WRAP1(epoll_create, int, size);
COMPAT_SYSCALL_WRAP4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event __user *, event);
COMPAT_SYSCALL_WRAP4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout);
-COMPAT_SYSCALL_WRAP1(timer_getoverrun, timer_t, timer_id);
-COMPAT_SYSCALL_WRAP1(timer_delete, compat_timer_t, compat_timer_id);
COMPAT_SYSCALL_WRAP1(io_destroy, aio_context_t, ctx);
COMPAT_SYSCALL_WRAP3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result);
COMPAT_SYSCALL_WRAP1(mq_unlink, const char __user *, name);
COMPAT_SYSCALL_WRAP5(add_key, const char __user *, tp, const char __user *, dsc, const void __user *, pld, size_t, len, key_serial_t, id);
COMPAT_SYSCALL_WRAP4(request_key, const char __user *, tp, const char __user *, dsc, const char __user *, info, key_serial_t, id);
COMPAT_SYSCALL_WRAP5(remap_file_pages, unsigned long, start, unsigned long, size, unsigned long, prot, unsigned long, pgoff, unsigned long, flags);
-COMPAT_SYSCALL_WRAP3(ioprio_set, int, which, int, who, int, ioprio);
-COMPAT_SYSCALL_WRAP2(ioprio_get, int, which, int, who);
COMPAT_SYSCALL_WRAP3(inotify_add_watch, int, fd, const char __user *, path, u32, mask);
-COMPAT_SYSCALL_WRAP2(inotify_rm_watch, int, fd, __s32, wd);
COMPAT_SYSCALL_WRAP3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode);
COMPAT_SYSCALL_WRAP4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, unsigned, dev);
COMPAT_SYSCALL_WRAP5(fchownat, int, dfd, const char __user *, filename, uid_t, user, gid_t, group, int, flag);
@@ -192,23 +153,11 @@ COMPAT_SYSCALL_WRAP1(unshare, unsigned long, unshare_flags);
COMPAT_SYSCALL_WRAP6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
COMPAT_SYSCALL_WRAP4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags);
COMPAT_SYSCALL_WRAP3(getcpu, unsigned __user *, cpu, unsigned __user *, node, struct getcpu_cache __user *, cache);
-COMPAT_SYSCALL_WRAP1(eventfd, unsigned int, count);
-COMPAT_SYSCALL_WRAP2(timerfd_create, int, clockid, int, flags);
-COMPAT_SYSCALL_WRAP2(eventfd2, unsigned int, count, int, flags);
-COMPAT_SYSCALL_WRAP1(inotify_init1, int, flags);
COMPAT_SYSCALL_WRAP2(pipe2, int __user *, fildes, int, flags);
-COMPAT_SYSCALL_WRAP3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags);
-COMPAT_SYSCALL_WRAP1(epoll_create1, int, flags);
-COMPAT_SYSCALL_WRAP2(tkill, int, pid, int, sig);
-COMPAT_SYSCALL_WRAP3(tgkill, int, tgid, int, pid, int, sig);
COMPAT_SYSCALL_WRAP5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags);
-COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val);
-COMPAT_SYSCALL_WRAP2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags);
+COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, unsigned long, tls);
COMPAT_SYSCALL_WRAP4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim);
COMPAT_SYSCALL_WRAP5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, int __user *, mnt_id, int, flag);
-COMPAT_SYSCALL_WRAP1(syncfs, int, fd);
-COMPAT_SYSCALL_WRAP2(setns, int, fd, int, nstype);
-COMPAT_SYSCALL_WRAP2(s390_runtime_instr, int, command, int, signum);
COMPAT_SYSCALL_WRAP5(kcmp, pid_t, pid1, pid_t, pid2, int, type, unsigned long, idx1, unsigned long, idx2);
COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, flags);
COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags);
@@ -220,3 +169,11 @@ COMPAT_SYSCALL_WRAP2(memfd_create, const char __user *, uname, unsigned int, fla
COMPAT_SYSCALL_WRAP3(bpf, int, cmd, union bpf_attr *, attr, unsigned int, size);
COMPAT_SYSCALL_WRAP3(s390_pci_mmio_write, const unsigned long, mmio_addr, const void __user *, user_buffer, const size_t, length);
COMPAT_SYSCALL_WRAP3(s390_pci_mmio_read, const unsigned long, mmio_addr, void __user *, user_buffer, const size_t, length);
+COMPAT_SYSCALL_WRAP4(socketpair, int, family, int, type, int, protocol, int __user *, usockvec);
+COMPAT_SYSCALL_WRAP3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen);
+COMPAT_SYSCALL_WRAP3(connect, int, fd, struct sockaddr __user *, uservaddr, int, addrlen);
+COMPAT_SYSCALL_WRAP4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, int __user *, upeer_addrlen, int, flags);
+COMPAT_SYSCALL_WRAP3(getsockname, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len);
+COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len);
+COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
+COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
diff --git a/kernel/arch/s390/kernel/cpcmd.c b/kernel/arch/s390/kernel/cpcmd.c
index 199ec92ef..7f768914f 100644
--- a/kernel/arch/s390/kernel/cpcmd.c
+++ b/kernel/arch/s390/kernel/cpcmd.c
@@ -14,6 +14,7 @@
#include <linux/spinlock.h>
#include <linux/stddef.h>
#include <linux/string.h>
+#include <asm/diag.h>
#include <asm/ebcdic.h>
#include <asm/cpcmd.h>
#include <asm/io.h>
@@ -70,6 +71,7 @@ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
memcpy(cpcmd_buf, cmd, cmdlen);
ASCEBC(cpcmd_buf, cmdlen);
+ diag_stat_inc(DIAG_STAT_X008);
if (response) {
memset(response, 0, rlen);
response_len = rlen;
diff --git a/kernel/arch/s390/kernel/crash_dump.c b/kernel/arch/s390/kernel/crash_dump.c
index 49b74454d..171e09bb8 100644
--- a/kernel/arch/s390/kernel/crash_dump.c
+++ b/kernel/arch/s390/kernel/crash_dump.c
@@ -32,43 +32,9 @@ static struct memblock_type oldmem_type = {
.regions = &oldmem_region,
};
-#define for_each_dump_mem_range(i, nid, p_start, p_end, p_nid) \
- for (i = 0, __next_mem_range(&i, nid, &memblock.physmem, \
- &oldmem_type, p_start, \
- p_end, p_nid); \
- i != (u64)ULLONG_MAX; \
- __next_mem_range(&i, nid, &memblock.physmem, \
- &oldmem_type, \
- p_start, p_end, p_nid))
-
struct dump_save_areas dump_save_areas;
/*
- * Allocate and add a save area for a CPU
- */
-struct save_area_ext *dump_save_area_create(int cpu)
-{
- struct save_area_ext **save_areas, *save_area;
-
- save_area = kmalloc(sizeof(*save_area), GFP_KERNEL);
- if (!save_area)
- return NULL;
- if (cpu + 1 > dump_save_areas.count) {
- dump_save_areas.count = cpu + 1;
- save_areas = krealloc(dump_save_areas.areas,
- dump_save_areas.count * sizeof(void *),
- GFP_KERNEL | __GFP_ZERO);
- if (!save_areas) {
- kfree(save_area);
- return NULL;
- }
- dump_save_areas.areas = save_areas;
- }
- dump_save_areas.areas[cpu] = save_area;
- return save_area;
-}
-
-/*
* Return physical address for virtual address
*/
static inline void *load_real_addr(void *addr)
@@ -122,7 +88,7 @@ static ssize_t copy_oldmem_page_zfcpdump(char *buf, size_t csize,
{
int rc;
- if (src < sclp_get_hsa_size()) {
+ if (src < sclp.hsa_size) {
rc = memcpy_hsa(buf, src, csize, userbuf);
} else {
if (userbuf)
@@ -215,7 +181,7 @@ static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma,
unsigned long pfn,
unsigned long size, pgprot_t prot)
{
- unsigned long hsa_end = sclp_get_hsa_size();
+ unsigned long hsa_end = sclp.hsa_size;
unsigned long size_hsa;
if (pfn < hsa_end >> PAGE_SHIFT) {
@@ -258,7 +224,7 @@ int copy_from_oldmem(void *dest, void *src, size_t count)
return rc;
}
} else {
- unsigned long hsa_end = sclp_get_hsa_size();
+ unsigned long hsa_end = sclp.hsa_size;
if ((unsigned long) src < hsa_end) {
copied = min(count, hsa_end - (unsigned long) src);
rc = memcpy_hsa(dest, (unsigned long) src, copied, 0);
@@ -539,7 +505,8 @@ static int get_mem_chunk_cnt(void)
int cnt = 0;
u64 idx;
- for_each_dump_mem_range(idx, NUMA_NO_NODE, NULL, NULL, NULL)
+ for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE,
+ MEMBLOCK_NONE, NULL, NULL, NULL)
cnt++;
return cnt;
}
@@ -552,7 +519,8 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
phys_addr_t start, end;
u64 idx;
- for_each_dump_mem_range(idx, NUMA_NO_NODE, &start, &end, NULL) {
+ for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE,
+ MEMBLOCK_NONE, &start, &end, NULL) {
phdr->p_filesz = end - start;
phdr->p_type = PT_LOAD;
phdr->p_offset = start;
@@ -609,7 +577,7 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
if (elfcorehdr_addr != ELFCORE_ADDR_MAX)
return 0;
/* If we cannot get HSA size for zfcpdump return error */
- if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp_get_hsa_size())
+ if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp.hsa_size)
return -ENODEV;
/* For kdump, exclude previous crashkernel memory */
diff --git a/kernel/arch/s390/kernel/debug.c b/kernel/arch/s390/kernel/debug.c
index c1f21aca7..6fca0e464 100644
--- a/kernel/arch/s390/kernel/debug.c
+++ b/kernel/arch/s390/kernel/debug.c
@@ -1457,23 +1457,24 @@ int
debug_dflt_header_fn(debug_info_t * id, struct debug_view *view,
int area, debug_entry_t * entry, char *out_buf)
{
- struct timespec time_spec;
+ struct timespec64 time_spec;
char *except_str;
unsigned long caller;
int rc = 0;
unsigned int level;
level = entry->id.fields.level;
- stck_to_timespec(entry->id.stck, &time_spec);
+ stck_to_timespec64(entry->id.stck, &time_spec);
if (entry->id.fields.exception)
except_str = "*";
else
except_str = "-";
caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN;
- rc += sprintf(out_buf, "%02i %011lu:%06lu %1u %1s %02i %p ",
- area, time_spec.tv_sec, time_spec.tv_nsec / 1000, level,
- except_str, entry->id.fields.cpuid, (void *) caller);
+ rc += sprintf(out_buf, "%02i %011lld:%06lu %1u %1s %02i %p ",
+ area, (long long)time_spec.tv_sec,
+ time_spec.tv_nsec / 1000, level, except_str,
+ entry->id.fields.cpuid, (void *)caller);
return rc;
}
EXPORT_SYMBOL(debug_dflt_header_fn);
diff --git a/kernel/arch/s390/kernel/diag.c b/kernel/arch/s390/kernel/diag.c
index 2f69243bf..48b37b835 100644
--- a/kernel/arch/s390/kernel/diag.c
+++ b/kernel/arch/s390/kernel/diag.c
@@ -6,12 +6,137 @@
*/
#include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
#include <asm/diag.h>
+#include <asm/trace/diag.h>
+
+struct diag_stat {
+ unsigned int counter[NR_DIAG_STAT];
+};
+
+static DEFINE_PER_CPU(struct diag_stat, diag_stat);
+
+struct diag_desc {
+ int code;
+ char *name;
+};
+
+static const struct diag_desc diag_map[NR_DIAG_STAT] = {
+ [DIAG_STAT_X008] = { .code = 0x008, .name = "Console Function" },
+ [DIAG_STAT_X00C] = { .code = 0x00c, .name = "Pseudo Timer" },
+ [DIAG_STAT_X010] = { .code = 0x010, .name = "Release Pages" },
+ [DIAG_STAT_X014] = { .code = 0x014, .name = "Spool File Services" },
+ [DIAG_STAT_X044] = { .code = 0x044, .name = "Voluntary Timeslice End" },
+ [DIAG_STAT_X064] = { .code = 0x064, .name = "NSS Manipulation" },
+ [DIAG_STAT_X09C] = { .code = 0x09c, .name = "Relinquish Timeslice" },
+ [DIAG_STAT_X0DC] = { .code = 0x0dc, .name = "Appldata Control" },
+ [DIAG_STAT_X204] = { .code = 0x204, .name = "Logical-CPU Utilization" },
+ [DIAG_STAT_X210] = { .code = 0x210, .name = "Device Information" },
+ [DIAG_STAT_X224] = { .code = 0x224, .name = "EBCDIC-Name Table" },
+ [DIAG_STAT_X250] = { .code = 0x250, .name = "Block I/O" },
+ [DIAG_STAT_X258] = { .code = 0x258, .name = "Page-Reference Services" },
+ [DIAG_STAT_X288] = { .code = 0x288, .name = "Time Bomb" },
+ [DIAG_STAT_X2C4] = { .code = 0x2c4, .name = "FTP Services" },
+ [DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" },
+ [DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" },
+ [DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" },
+ [DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
+};
+
+static int show_diag_stat(struct seq_file *m, void *v)
+{
+ struct diag_stat *stat;
+ unsigned long n = (unsigned long) v - 1;
+ int cpu, prec, tmp;
+
+ get_online_cpus();
+ if (n == 0) {
+ seq_puts(m, " ");
+
+ for_each_online_cpu(cpu) {
+ prec = 10;
+ for (tmp = 10; cpu >= tmp; tmp *= 10)
+ prec--;
+ seq_printf(m, "%*s%d", prec, "CPU", cpu);
+ }
+ seq_putc(m, '\n');
+ } else if (n <= NR_DIAG_STAT) {
+ seq_printf(m, "diag %03x:", diag_map[n-1].code);
+ for_each_online_cpu(cpu) {
+ stat = &per_cpu(diag_stat, cpu);
+ seq_printf(m, " %10u", stat->counter[n-1]);
+ }
+ seq_printf(m, " %s\n", diag_map[n-1].name);
+ }
+ put_online_cpus();
+ return 0;
+}
+
+static void *show_diag_stat_start(struct seq_file *m, loff_t *pos)
+{
+ return *pos <= nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL;
+}
+
+static void *show_diag_stat_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ ++*pos;
+ return show_diag_stat_start(m, pos);
+}
+
+static void show_diag_stat_stop(struct seq_file *m, void *v)
+{
+}
+
+static const struct seq_operations show_diag_stat_sops = {
+ .start = show_diag_stat_start,
+ .next = show_diag_stat_next,
+ .stop = show_diag_stat_stop,
+ .show = show_diag_stat,
+};
+
+static int show_diag_stat_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &show_diag_stat_sops);
+}
+
+static const struct file_operations show_diag_stat_fops = {
+ .open = show_diag_stat_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+
+static int __init show_diag_stat_init(void)
+{
+ debugfs_create_file("diag_stat", 0400, NULL, NULL,
+ &show_diag_stat_fops);
+ return 0;
+}
+
+device_initcall(show_diag_stat_init);
+
+void diag_stat_inc(enum diag_stat_enum nr)
+{
+ this_cpu_inc(diag_stat.counter[nr]);
+ trace_s390_diagnose(diag_map[nr].code);
+}
+EXPORT_SYMBOL(diag_stat_inc);
+
+void diag_stat_inc_norecursion(enum diag_stat_enum nr)
+{
+ this_cpu_inc(diag_stat.counter[nr]);
+ trace_s390_diagnose_norecursion(diag_map[nr].code);
+}
+EXPORT_SYMBOL(diag_stat_inc_norecursion);
/*
* Diagnose 14: Input spool file manipulation
*/
-int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
+static inline int __diag14(unsigned long rx, unsigned long ry1,
+ unsigned long subcode)
{
register unsigned long _ry1 asm("2") = ry1;
register unsigned long _ry2 asm("3") = subcode;
@@ -29,6 +154,12 @@ int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
return rc;
}
+
+int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
+{
+ diag_stat_inc(DIAG_STAT_X014);
+ return __diag14(rx, ry1, subcode);
+}
EXPORT_SYMBOL(diag14);
/*
@@ -48,6 +179,7 @@ int diag210(struct diag210 *addr)
spin_lock_irqsave(&diag210_lock, flags);
diag210_tmp = *addr;
+ diag_stat_inc(DIAG_STAT_X210);
asm volatile(
" lhi %0,-1\n"
" sam31\n"
diff --git a/kernel/arch/s390/kernel/dis.c b/kernel/arch/s390/kernel/dis.c
index 8140d10c6..6e7296160 100644
--- a/kernel/arch/s390/kernel/dis.c
+++ b/kernel/arch/s390/kernel/dis.c
@@ -1920,16 +1920,23 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
}
if (separator)
ptr += sprintf(ptr, "%c", separator);
+ /*
+ * Use four '%' characters below because of the
+ * following two conversions:
+ *
+ * 1) sprintf: %%%%r -> %%r
+ * 2) printk : %%r -> %r
+ */
if (operand->flags & OPERAND_GPR)
- ptr += sprintf(ptr, "%%r%i", value);
+ ptr += sprintf(ptr, "%%%%r%i", value);
else if (operand->flags & OPERAND_FPR)
- ptr += sprintf(ptr, "%%f%i", value);
+ ptr += sprintf(ptr, "%%%%f%i", value);
else if (operand->flags & OPERAND_AR)
- ptr += sprintf(ptr, "%%a%i", value);
+ ptr += sprintf(ptr, "%%%%a%i", value);
else if (operand->flags & OPERAND_CR)
- ptr += sprintf(ptr, "%%c%i", value);
+ ptr += sprintf(ptr, "%%%%c%i", value);
else if (operand->flags & OPERAND_VR)
- ptr += sprintf(ptr, "%%v%i", value);
+ ptr += sprintf(ptr, "%%%%v%i", value);
else if (operand->flags & OPERAND_PCREL)
ptr += sprintf(ptr, "%lx", (signed int) value
+ addr);
diff --git a/kernel/arch/s390/kernel/early.c b/kernel/arch/s390/kernel/early.c
index 549a73a4b..3c31609df 100644
--- a/kernel/arch/s390/kernel/early.c
+++ b/kernel/arch/s390/kernel/early.c
@@ -17,6 +17,7 @@
#include <linux/pfn.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
+#include <asm/diag.h>
#include <asm/ebcdic.h>
#include <asm/ipl.h>
#include <asm/lowcore.h>
@@ -286,6 +287,7 @@ static __init void detect_diag9c(void)
int rc;
cpu_address = stap();
+ diag_stat_inc(DIAG_STAT_X09C);
asm volatile(
" diag %2,0,0x9c\n"
"0: la %0,0\n"
@@ -300,6 +302,7 @@ static __init void detect_diag44(void)
{
int rc;
+ diag_stat_inc(DIAG_STAT_X044);
asm volatile(
" diag 0,0,0x44\n"
"0: la %0,0\n"
@@ -326,9 +329,19 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
if (test_facility(51))
S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
- if (test_facility(129))
+ if (test_facility(129)) {
S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
+ __ctl_set_bit(0, 17);
+ }
+}
+
+static int __init disable_vector_extension(char *str)
+{
+ S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
+ __ctl_clear_bit(0, 17);
+ return 1;
}
+early_param("novx", disable_vector_extension);
static int __init cad_setup(char *str)
{
diff --git a/kernel/arch/s390/kernel/entry.S b/kernel/arch/s390/kernel/entry.S
index 99b44acbf..857b6526d 100644
--- a/kernel/arch/s390/kernel/entry.S
+++ b/kernel/arch/s390/kernel/entry.S
@@ -20,6 +20,9 @@
#include <asm/page.h>
#include <asm/sigp.h>
#include <asm/irq.h>
+#include <asm/vx-insn.h>
+#include <asm/setup.h>
+#include <asm/nmi.h>
__PT_R0 = __PT_GPRS
__PT_R1 = __PT_GPRS + 8
@@ -46,10 +49,10 @@ _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
_TIF_UPROBE)
_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
_TIF_SYSCALL_TRACEPOINT)
-_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE)
+_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE | _CIF_FPU)
_PIF_WORK = (_PIF_PER_TRAP)
-#define BASED(name) name-system_call(%r13)
+#define BASED(name) name-cleanup_critical(%r13)
.macro TRACE_IRQS_ON
#ifdef CONFIG_TRACE_IRQFLAGS
@@ -73,38 +76,6 @@ _PIF_WORK = (_PIF_PER_TRAP)
#endif
.endm
- .macro LPP newpp
-#if IS_ENABLED(CONFIG_KVM)
- tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_LPP
- jz .+8
- .insn s,0xb2800000,\newpp
-#endif
- .endm
-
- .macro HANDLE_SIE_INTERCEPT scratch,reason
-#if IS_ENABLED(CONFIG_KVM)
- tmhh %r8,0x0001 # interrupting from user ?
- jnz .+62
- lgr \scratch,%r9
- slg \scratch,BASED(.Lsie_critical)
- clg \scratch,BASED(.Lsie_critical_length)
- .if \reason==1
- # Some program interrupts are suppressing (e.g. protection).
- # We must also check the instruction after SIE in that case.
- # do_protection_exception will rewind to .Lrewind_pad
- jh .+42
- .else
- jhe .+42
- .endif
- lg %r14,__SF_EMPTY(%r15) # get control block pointer
- LPP __SF_EMPTY+16(%r15) # set host id
- ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
- lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
- larl %r9,sie_exit # skip forward to sie_exit
- mvi __SF_EMPTY+31(%r15),\reason # set exit reason
-#endif
- .endm
-
.macro CHECK_STACK stacksize,savearea
#ifdef CONFIG_CHECK_STACK
tml %r15,\stacksize - CONFIG_STACK_GUARD
@@ -113,7 +84,7 @@ _PIF_WORK = (_PIF_PER_TRAP)
#endif
.endm
- .macro SWITCH_ASYNC savearea,stack,shift
+ .macro SWITCH_ASYNC savearea,timer
tmhh %r8,0x0001 # interrupting from user ?
jnz 1f
lgr %r14,%r9
@@ -124,26 +95,28 @@ _PIF_WORK = (_PIF_PER_TRAP)
brasl %r14,cleanup_critical
tmhh %r8,0x0001 # retest problem state after cleanup
jnz 1f
-0: lg %r14,\stack # are we already on the target stack?
+0: lg %r14,__LC_ASYNC_STACK # are we already on the async stack?
slgr %r14,%r15
- srag %r14,%r14,\shift
- jnz 1f
- CHECK_STACK 1<<\shift,\savearea
+ srag %r14,%r14,STACK_SHIFT
+ jnz 2f
+ CHECK_STACK 1<<STACK_SHIFT,\savearea
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- j 2f
-1: lg %r15,\stack # load target stack
-2: la %r11,STACK_FRAME_OVERHEAD(%r15)
+ j 3f
+1: LAST_BREAK %r14
+ UPDATE_VTIME %r14,%r15,\timer
+2: lg %r15,__LC_ASYNC_STACK # load async stack
+3: la %r11,STACK_FRAME_OVERHEAD(%r15)
.endm
- .macro UPDATE_VTIME scratch,enter_timer
- lg \scratch,__LC_EXIT_TIMER
- slg \scratch,\enter_timer
- alg \scratch,__LC_USER_TIMER
- stg \scratch,__LC_USER_TIMER
- lg \scratch,__LC_LAST_UPDATE_TIMER
- slg \scratch,__LC_EXIT_TIMER
- alg \scratch,__LC_SYSTEM_TIMER
- stg \scratch,__LC_SYSTEM_TIMER
+ .macro UPDATE_VTIME w1,w2,enter_timer
+ lg \w1,__LC_EXIT_TIMER
+ lg \w2,__LC_LAST_UPDATE_TIMER
+ slg \w1,\enter_timer
+ slg \w2,__LC_EXIT_TIMER
+ alg \w1,__LC_USER_TIMER
+ alg \w2,__LC_SYSTEM_TIMER
+ stg \w1,__LC_USER_TIMER
+ stg \w2,__LC_SYSTEM_TIMER
mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer
.endm
@@ -167,6 +140,28 @@ _PIF_WORK = (_PIF_PER_TRAP)
#endif
.endm
+ /*
+ * The TSTMSK macro generates a test-under-mask instruction by
+ * calculating the memory offset for the specified mask value.
+ * Mask value can be any constant. The macro shifts the mask
+ * value to calculate the memory offset for the test-under-mask
+ * instruction.
+ */
+ .macro TSTMSK addr, mask, size=8, bytepos=0
+ .if (\bytepos < \size) && (\mask >> 8)
+ .if (\mask & 0xff)
+ .error "Mask exceeds byte boundary"
+ .endif
+ TSTMSK \addr, "(\mask >> 8)", \size, "(\bytepos + 1)"
+ .exitm
+ .endif
+ .ifeq \mask
+ .error "Mask must not be zero"
+ .endif
+ off = \size - \bytepos - 1
+ tm off+\addr, \mask
+ .endm
+
.section .kprobes.text, "ax"
/*
@@ -178,21 +173,84 @@ _PIF_WORK = (_PIF_PER_TRAP)
*/
ENTRY(__switch_to)
stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task
- stg %r15,__THREAD_ksp(%r2) # store kernel stack of prev
- lg %r4,__THREAD_info(%r2) # get thread_info of prev
- lg %r5,__THREAD_info(%r3) # get thread_info of next
+ lgr %r1,%r2
+ aghi %r1,__TASK_thread # thread_struct of prev task
+ lg %r4,__TASK_thread_info(%r2) # get thread_info of prev
+ lg %r5,__TASK_thread_info(%r3) # get thread_info of next
+ stg %r15,__THREAD_ksp(%r1) # store kernel stack of prev
+ lgr %r1,%r3
+ aghi %r1,__TASK_thread # thread_struct of next task
lgr %r15,%r5
aghi %r15,STACK_INIT # end of kernel stack of next
stg %r3,__LC_CURRENT # store task struct of next
stg %r5,__LC_THREAD_INFO # store thread info of next
stg %r15,__LC_KERNEL_STACK # store end of kernel stack
+ lg %r15,__THREAD_ksp(%r1) # load kernel stack of next
lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4
- mvc __LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next
- lg %r15,__THREAD_ksp(%r3) # load kernel stack of next
+ mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
+ TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP
+ bzr %r14
+ .insn s,0xb2800000,__LC_LPP # set program parameter
br %r14
.L__critical_start:
+
+#if IS_ENABLED(CONFIG_KVM)
+/*
+ * sie64a calling convention:
+ * %r2 pointer to sie control block
+ * %r3 guest register save area
+ */
+ENTRY(sie64a)
+ stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
+ stg %r2,__SF_EMPTY(%r15) # save control block pointer
+ stg %r3,__SF_EMPTY+8(%r15) # save guest register save area
+ xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # reason code = 0
+ TSTMSK __LC_CPU_FLAGS,_CIF_FPU # load guest fp/vx registers ?
+ jno .Lsie_load_guest_gprs
+ brasl %r14,load_fpu_regs # load guest fp/vx regs
+.Lsie_load_guest_gprs:
+ lmg %r0,%r13,0(%r3) # load guest gprs 0-13
+ lg %r14,__LC_GMAP # get gmap pointer
+ ltgr %r14,%r14
+ jz .Lsie_gmap
+ lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce
+.Lsie_gmap:
+ lg %r14,__SF_EMPTY(%r15) # get control block pointer
+ oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
+ tm __SIE_PROG20+3(%r14),3 # last exit...
+ jnz .Lsie_skip
+ TSTMSK __LC_CPU_FLAGS,_CIF_FPU
+ jo .Lsie_skip # exit if fp/vx regs changed
+ sie 0(%r14)
+.Lsie_skip:
+ ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+.Lsie_done:
+# some program checks are suppressing. C code (e.g. do_protection_exception)
+# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
+# instructions between sie64a and .Lsie_done should not cause program
+# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
+# See also .Lcleanup_sie
+.Lrewind_pad:
+ nop 0
+ .globl sie_exit
+sie_exit:
+ lg %r14,__SF_EMPTY+8(%r15) # load guest register save area
+ stmg %r0,%r13,0(%r14) # save guest gprs 0-13
+ lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
+ lg %r2,__SF_EMPTY+16(%r15) # return exit reason code
+ br %r14
+.Lsie_fault:
+ lghi %r14,-EFAULT
+ stg %r14,__SF_EMPTY+16(%r15) # set exit reason code
+ j sie_exit
+
+ EX_TABLE(.Lrewind_pad,.Lsie_fault)
+ EX_TABLE(sie_exit,.Lsie_fault)
+#endif
+
/*
* SVC interrupt handler routine. System calls are synchronous events and
* are executed with interrupts enabled.
@@ -208,9 +266,9 @@ ENTRY(system_call)
.Lsysc_per:
lg %r15,__LC_KERNEL_STACK
la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
-.Lsysc_vtime:
- UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER
LAST_BREAK %r13
+.Lsysc_vtime:
+ UPDATE_VTIME %r10,%r13,__LC_SYNC_ENTER_TIMER
stmg %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW
@@ -232,7 +290,7 @@ ENTRY(system_call)
stg %r2,__PT_ORIG_GPR2(%r11)
stg %r7,STACK_FRAME_OVERHEAD(%r15)
lgf %r9,0(%r8,%r10) # get system call add.
- tm __TI_flags+7(%r12),_TIF_TRACE
+ TSTMSK __TI_flags(%r12),_TIF_TRACE
jnz .Lsysc_tracesys
basr %r14,%r9 # call sys_xxxx
stg %r2,__PT_R2(%r11) # store return value
@@ -240,13 +298,11 @@ ENTRY(system_call)
.Lsysc_return:
LOCKDEP_SYS_EXIT
.Lsysc_tif:
- tm __PT_PSW+1(%r11),0x01 # returning to user ?
- jno .Lsysc_restore
- tm __PT_FLAGS+7(%r11),_PIF_WORK
+ TSTMSK __PT_FLAGS(%r11),_PIF_WORK
jnz .Lsysc_work
- tm __TI_flags+7(%r12),_TIF_WORK
+ TSTMSK __TI_flags(%r12),_TIF_WORK
jnz .Lsysc_work # check for work
- tm __LC_CPU_FLAGS+7,_CIF_WORK
+ TSTMSK __LC_CPU_FLAGS,_CIF_WORK
jnz .Lsysc_work
.Lsysc_restore:
lg %r14,__LC_VDSO_PER_CPU
@@ -262,21 +318,23 @@ ENTRY(system_call)
# One of the work bits is on. Find out which one.
#
.Lsysc_work:
- tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
+ TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING
jo .Lsysc_mcck_pending
- tm __TI_flags+7(%r12),_TIF_NEED_RESCHED
+ TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED
jo .Lsysc_reschedule
#ifdef CONFIG_UPROBES
- tm __TI_flags+7(%r12),_TIF_UPROBE
+ TSTMSK __TI_flags(%r12),_TIF_UPROBE
jo .Lsysc_uprobe_notify
#endif
- tm __PT_FLAGS+7(%r11),_PIF_PER_TRAP
+ TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP
jo .Lsysc_singlestep
- tm __TI_flags+7(%r12),_TIF_SIGPENDING
+ TSTMSK __TI_flags(%r12),_TIF_SIGPENDING
jo .Lsysc_sigpending
- tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME
+ TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME
jo .Lsysc_notify_resume
- tm __LC_CPU_FLAGS+7,_CIF_ASCE
+ TSTMSK __LC_CPU_FLAGS,_CIF_FPU
+ jo .Lsysc_vxrs
+ TSTMSK __LC_CPU_FLAGS,_CIF_ASCE
jo .Lsysc_uaccess
j .Lsysc_return # beware of critical section cleanup
@@ -303,12 +361,19 @@ ENTRY(system_call)
j .Lsysc_return
#
+# CIF_FPU is set, restore floating-point controls and floating-point registers.
+#
+.Lsysc_vxrs:
+ larl %r14,.Lsysc_return
+ jg load_fpu_regs
+
+#
# _TIF_SIGPENDING is set, call do_signal
#
.Lsysc_sigpending:
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,do_signal
- tm __PT_FLAGS+7(%r11),_PIF_SYSCALL
+ TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL
jno .Lsysc_return
lmg %r2,%r7,__PT_R2(%r11) # load svc arguments
lg %r10,__TI_sysc_table(%r12) # address of system call table
@@ -368,7 +433,7 @@ ENTRY(system_call)
basr %r14,%r9 # call sys_xxx
stg %r2,__PT_R2(%r11) # store return value
.Lsysc_tracenogo:
- tm __TI_flags+7(%r12),_TIF_TRACE
+ TSTMSK __TI_flags(%r12),_TIF_TRACE
jz .Lsysc_return
lgr %r2,%r11 # pass pointer to pt_regs
larl %r14,.Lsysc_return
@@ -401,27 +466,35 @@ ENTRY(pgm_check_handler)
stmg %r8,%r15,__LC_SAVE_AREA_SYNC
lg %r10,__LC_LAST_BREAK
lg %r12,__LC_THREAD_INFO
- larl %r13,system_call
+ larl %r13,cleanup_critical
lmg %r8,%r9,__LC_PGM_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,1
tmhh %r8,0x0001 # test problem state bit
- jnz 1f # -> fault in user space
- tmhh %r8,0x4000 # PER bit set in old PSW ?
- jnz 0f # -> enabled, can't be a double fault
+ jnz 2f # -> fault in user space
+#if IS_ENABLED(CONFIG_KVM)
+ # cleanup critical section for sie64a
+ lgr %r14,%r9
+ slg %r14,BASED(.Lsie_critical_start)
+ clg %r14,BASED(.Lsie_critical_length)
+ jhe 0f
+ brasl %r14,.Lcleanup_sie
+#endif
+0: tmhh %r8,0x4000 # PER bit set in old PSW ?
+ jnz 1f # -> enabled, can't be a double fault
tm __LC_PGM_ILC+3,0x80 # check for per exception
jnz .Lpgm_svcper # -> single stepped svc
-0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
+1: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- j 2f
-1: UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER
- LAST_BREAK %r14
+ j 3f
+2: LAST_BREAK %r14
+ UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
lg %r15,__LC_KERNEL_STACK
lg %r14,__TI_task(%r12)
+ aghi %r14,__TASK_thread # pointer to thread_struct
lghi %r13,__LC_PGM_TDB
tm __LC_PGM_ILC+2,0x02 # check for transaction abort
- jz 2f
+ jz 3f
mvc __THREAD_trap_tdb(256,%r14),0(%r13)
-2: la %r11,STACK_FRAME_OVERHEAD(%r15)
+3: la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
stmg %r8,%r9,__PT_PSW(%r11)
@@ -430,24 +503,28 @@ ENTRY(pgm_check_handler)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
stg %r10,__PT_ARGS(%r11)
tm __LC_PGM_ILC+3,0x80 # check for per exception
- jz 0f
+ jz 4f
tmhh %r8,0x0001 # kernel per event ?
jz .Lpgm_kprobe
oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP
mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE
mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-0: REENABLE_IRQS
+4: REENABLE_IRQS
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
larl %r1,pgm_check_table
llgh %r10,__PT_INT_CODE+2(%r11)
nill %r10,0x007f
sll %r10,2
- je .Lsysc_return
+ je .Lpgm_return
lgf %r1,0(%r10,%r1) # load address of handler routine
lgr %r2,%r11 # pass pointer to pt_regs
basr %r14,%r1 # branch to interrupt-handler
- j .Lsysc_return
+.Lpgm_return:
+ LOCKDEP_SYS_EXIT
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
+ jno .Lsysc_restore
+ j .Lsysc_tif
#
# PER event in supervisor state, must be kprobes
@@ -457,7 +534,7 @@ ENTRY(pgm_check_handler)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,do_per_trap
- j .Lsysc_return
+ j .Lpgm_return
#
# single stepped system call
@@ -478,20 +555,16 @@ ENTRY(io_int_handler)
stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
lg %r10,__LC_LAST_BREAK
lg %r12,__LC_THREAD_INFO
- larl %r13,system_call
+ larl %r13,cleanup_critical
lmg %r8,%r9,__LC_IO_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,2
- SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
- tmhh %r8,0x0001 # interrupting from user?
- jz .Lio_skip
- UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
- LAST_BREAK %r14
-.Lio_skip:
+ SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
stmg %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
stmg %r8,%r9,__PT_PSW(%r11)
mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+ TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ
+ jo .Lio_restore
TRACE_IRQS_OFF
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
.Lio_loop:
@@ -502,7 +575,7 @@ ENTRY(io_int_handler)
lghi %r3,THIN_INTERRUPT
.Lio_call:
brasl %r14,do_IRQ
- tm __LC_MACHINE_FLAGS+6,0x10 # MACHINE_FLAG_LPAR
+ TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPAR
jz .Lio_return
tpi 0
jz .Lio_return
@@ -512,9 +585,9 @@ ENTRY(io_int_handler)
LOCKDEP_SYS_EXIT
TRACE_IRQS_ON
.Lio_tif:
- tm __TI_flags+7(%r12),_TIF_WORK
+ TSTMSK __TI_flags(%r12),_TIF_WORK
jnz .Lio_work # there is work to do (signals etc.)
- tm __LC_CPU_FLAGS+7,_CIF_WORK
+ TSTMSK __LC_CPU_FLAGS,_CIF_WORK
jnz .Lio_work
.Lio_restore:
lg %r14,__LC_VDSO_PER_CPU
@@ -542,7 +615,7 @@ ENTRY(io_int_handler)
# check for preemptive scheduling
icm %r0,15,__TI_precount(%r12)
jnz .Lio_restore # preemption is disabled
- tm __TI_flags+7(%r12),_TIF_NEED_RESCHED
+ TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED
jno .Lio_restore
# switch to kernel stack
lg %r1,__PT_R15(%r11)
@@ -574,15 +647,17 @@ ENTRY(io_int_handler)
# One of the work bits is on. Find out which one.
#
.Lio_work_tif:
- tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
+ TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING
jo .Lio_mcck_pending
- tm __TI_flags+7(%r12),_TIF_NEED_RESCHED
+ TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED
jo .Lio_reschedule
- tm __TI_flags+7(%r12),_TIF_SIGPENDING
+ TSTMSK __TI_flags(%r12),_TIF_SIGPENDING
jo .Lio_sigpending
- tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME
+ TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME
jo .Lio_notify_resume
- tm __LC_CPU_FLAGS+7,_CIF_ASCE
+ TSTMSK __LC_CPU_FLAGS,_CIF_FPU
+ jo .Lio_vxrs
+ TSTMSK __LC_CPU_FLAGS,_CIF_ASCE
jo .Lio_uaccess
j .Lio_return # beware of critical section cleanup
@@ -604,6 +679,13 @@ ENTRY(io_int_handler)
j .Lio_return
#
+# CIF_FPU is set, restore floating-point controls and floating-point registers.
+#
+.Lio_vxrs:
+ larl %r14,.Lio_return
+ jg load_fpu_regs
+
+#
# _TIF_NEED_RESCHED is set, call schedule
#
.Lio_reschedule:
@@ -647,15 +729,9 @@ ENTRY(ext_int_handler)
stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
lg %r10,__LC_LAST_BREAK
lg %r12,__LC_THREAD_INFO
- larl %r13,system_call
+ larl %r13,cleanup_critical
lmg %r8,%r9,__LC_EXT_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,3
- SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
- tmhh %r8,0x0001 # interrupting from user ?
- jz .Lext_skip
- UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
- LAST_BREAK %r14
-.Lext_skip:
+ SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
stmg %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
stmg %r8,%r9,__PT_PSW(%r11)
@@ -664,6 +740,8 @@ ENTRY(ext_int_handler)
mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
mvc __PT_INT_PARM_LONG(8,%r11),0(%r1)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+ TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ
+ jo .Lio_restore
TRACE_IRQS_OFF
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
@@ -678,6 +756,14 @@ ENTRY(psw_idle)
stg %r3,__SF_EMPTY(%r15)
larl %r1,.Lpsw_idle_lpsw+4
stg %r1,__SF_EMPTY+8(%r15)
+#ifdef CONFIG_SMP
+ larl %r1,smp_cpu_mtid
+ llgf %r1,0(%r1)
+ ltgr %r1,%r1
+ jz .Lpsw_idle_stcctm
+ .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+16(%r15)
+.Lpsw_idle_stcctm:
+#endif
STCK __CLOCK_IDLE_ENTER(%r2)
stpt __TIMER_IDLE_ENTER(%r2)
.Lpsw_idle_lpsw:
@@ -685,6 +771,96 @@ ENTRY(psw_idle)
br %r14
.Lpsw_idle_end:
+/*
+ * Store floating-point controls and floating-point or vector register
+ * depending whether the vector facility is available. A critical section
+ * cleanup assures that the registers are stored even if interrupted for
+ * some other work. The CIF_FPU flag is set to trigger a lazy restore
+ * of the register contents at return from io or a system call.
+ */
+ENTRY(save_fpu_regs)
+ lg %r2,__LC_CURRENT
+ aghi %r2,__TASK_thread
+ TSTMSK __LC_CPU_FLAGS,_CIF_FPU
+ bor %r14
+ stfpc __THREAD_FPU_fpc(%r2)
+.Lsave_fpu_regs_fpc_end:
+ lg %r3,__THREAD_FPU_regs(%r2)
+ TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
+ jz .Lsave_fpu_regs_fp # no -> store FP regs
+.Lsave_fpu_regs_vx_low:
+ VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3)
+.Lsave_fpu_regs_vx_high:
+ VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3)
+ j .Lsave_fpu_regs_done # -> set CIF_FPU flag
+.Lsave_fpu_regs_fp:
+ std 0,0(%r3)
+ std 1,8(%r3)
+ std 2,16(%r3)
+ std 3,24(%r3)
+ std 4,32(%r3)
+ std 5,40(%r3)
+ std 6,48(%r3)
+ std 7,56(%r3)
+ std 8,64(%r3)
+ std 9,72(%r3)
+ std 10,80(%r3)
+ std 11,88(%r3)
+ std 12,96(%r3)
+ std 13,104(%r3)
+ std 14,112(%r3)
+ std 15,120(%r3)
+.Lsave_fpu_regs_done:
+ oi __LC_CPU_FLAGS+7,_CIF_FPU
+ br %r14
+.Lsave_fpu_regs_end:
+
+/*
+ * Load floating-point controls and floating-point or vector registers.
+ * A critical section cleanup assures that the register contents are
+ * loaded even if interrupted for some other work.
+ *
+ * There are special calling conventions to fit into sysc and io return work:
+ * %r15: <kernel stack>
+ * The function requires:
+ * %r4
+ */
+load_fpu_regs:
+ lg %r4,__LC_CURRENT
+ aghi %r4,__TASK_thread
+ TSTMSK __LC_CPU_FLAGS,_CIF_FPU
+ bnor %r14
+ lfpc __THREAD_FPU_fpc(%r4)
+ TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
+ lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area
+ jz .Lload_fpu_regs_fp # -> no VX, load FP regs
+.Lload_fpu_regs_vx:
+ VLM %v0,%v15,0,%r4
+.Lload_fpu_regs_vx_high:
+ VLM %v16,%v31,256,%r4
+ j .Lload_fpu_regs_done
+.Lload_fpu_regs_fp:
+ ld 0,0(%r4)
+ ld 1,8(%r4)
+ ld 2,16(%r4)
+ ld 3,24(%r4)
+ ld 4,32(%r4)
+ ld 5,40(%r4)
+ ld 6,48(%r4)
+ ld 7,56(%r4)
+ ld 8,64(%r4)
+ ld 9,72(%r4)
+ ld 10,80(%r4)
+ ld 11,88(%r4)
+ ld 12,96(%r4)
+ ld 13,104(%r4)
+ ld 14,112(%r4)
+ ld 15,120(%r4)
+.Lload_fpu_regs_done:
+ ni __LC_CPU_FLAGS+7,255-_CIF_FPU
+ br %r14
+.Lload_fpu_regs_end:
+
.L__critical_end:
/*
@@ -697,14 +873,13 @@ ENTRY(mcck_int_handler)
lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
lg %r10,__LC_LAST_BREAK
lg %r12,__LC_THREAD_INFO
- larl %r13,system_call
+ larl %r13,cleanup_critical
lmg %r8,%r9,__LC_MCK_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,4
- tm __LC_MCCK_CODE,0x80 # system damage?
+ TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE
jo .Lmcck_panic # yes -> rest of mcck code invalid
lghi %r14,__LC_CPU_TIMER_SAVE_AREA
mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
- tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid?
+ TSTMSK __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID
jo 3f
la %r14,__LC_SYNC_ENTER_TIMER
clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER
@@ -718,13 +893,9 @@ ENTRY(mcck_int_handler)
la %r14,__LC_LAST_UPDATE_TIMER
2: spt 0(%r14)
mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
-3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid?
+3: TSTMSK __LC_MCCK_CODE,(MCCK_CODE_PSW_MWP_VALID|MCCK_CODE_PSW_IA_VALID)
jno .Lmcck_panic # no -> skip cleanup critical
- SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT
- tm %r8,0x0001 # interrupting from user ?
- jz .Lmcck_skip
- UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER
- LAST_BREAK %r14
+ SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER
.Lmcck_skip:
lghi %r14,__LC_GPREGS_SAVE_AREA+64
stmg %r0,%r7,__PT_R0(%r11)
@@ -742,7 +913,7 @@ ENTRY(mcck_int_handler)
la %r11,STACK_FRAME_OVERHEAD(%r1)
lgr %r15,%r1
ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
- tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
+ TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING
jno .Lmcck_return
TRACE_IRQS_OFF
brasl %r14,s390_handle_mcck
@@ -759,19 +930,18 @@ ENTRY(mcck_int_handler)
lpswe __LC_RETURN_MCCK_PSW
.Lmcck_panic:
- lg %r14,__LC_PANIC_STACK
- slgr %r14,%r15
- srag %r14,%r14,PAGE_SHIFT
- jz 0f
lg %r15,__LC_PANIC_STACK
-0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j .Lmcck_skip
#
# PSW restart interrupt handler
#
ENTRY(restart_int_handler)
- stg %r15,__LC_SAVE_AREA_RESTART
+ TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP
+ jz 0f
+ .insn s,0xb2800000,__LC_LPP
+0: stg %r15,__LC_SAVE_AREA_RESTART
lg %r15,__LC_RESTART_STACK
aghi %r15,-__PT_SIZE # create pt_regs on stack
xc 0(__PT_SIZE,%r15),0(%r15)
@@ -814,20 +984,13 @@ stack_overflow:
jg kernel_stack_overflow
#endif
- .align 8
-.Lcleanup_table:
- .quad system_call
- .quad .Lsysc_do_svc
- .quad .Lsysc_tif
- .quad .Lsysc_restore
- .quad .Lsysc_done
- .quad .Lio_tif
- .quad .Lio_restore
- .quad .Lio_done
- .quad psw_idle
- .quad .Lpsw_idle_end
-
cleanup_critical:
+#if IS_ENABLED(CONFIG_KVM)
+ clg %r9,BASED(.Lcleanup_table_sie) # .Lsie_gmap
+ jl 0f
+ clg %r9,BASED(.Lcleanup_table_sie+8)# .Lsie_done
+ jl .Lcleanup_sie
+#endif
clg %r9,BASED(.Lcleanup_table) # system_call
jl 0f
clg %r9,BASED(.Lcleanup_table+8) # .Lsysc_do_svc
@@ -848,8 +1011,45 @@ cleanup_critical:
jl 0f
clg %r9,BASED(.Lcleanup_table+72) # .Lpsw_idle_end
jl .Lcleanup_idle
+ clg %r9,BASED(.Lcleanup_table+80) # save_fpu_regs
+ jl 0f
+ clg %r9,BASED(.Lcleanup_table+88) # .Lsave_fpu_regs_end
+ jl .Lcleanup_save_fpu_regs
+ clg %r9,BASED(.Lcleanup_table+96) # load_fpu_regs
+ jl 0f
+ clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end
+ jl .Lcleanup_load_fpu_regs
0: br %r14
+ .align 8
+.Lcleanup_table:
+ .quad system_call
+ .quad .Lsysc_do_svc
+ .quad .Lsysc_tif
+ .quad .Lsysc_restore
+ .quad .Lsysc_done
+ .quad .Lio_tif
+ .quad .Lio_restore
+ .quad .Lio_done
+ .quad psw_idle
+ .quad .Lpsw_idle_end
+ .quad save_fpu_regs
+ .quad .Lsave_fpu_regs_end
+ .quad load_fpu_regs
+ .quad .Lload_fpu_regs_end
+
+#if IS_ENABLED(CONFIG_KVM)
+.Lcleanup_table_sie:
+ .quad .Lsie_gmap
+ .quad .Lsie_done
+
+.Lcleanup_sie:
+ lg %r9,__SF_EMPTY(%r15) # get control block pointer
+ ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+ larl %r9,sie_exit # skip forward to sie_exit
+ br %r14
+#endif
.Lcleanup_system_call:
# check if stpt has been executed
@@ -910,7 +1110,7 @@ cleanup_critical:
.quad system_call
.quad .Lsysc_stmg
.quad .Lsysc_per
- .quad .Lsysc_vtime+18
+ .quad .Lsysc_vtime+36
.quad .Lsysc_vtime+42
.Lcleanup_sysc_tif:
@@ -958,7 +1158,27 @@ cleanup_critical:
jhe 1f
mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2)
-1: # account system time going idle
+1: # calculate idle cycles
+#ifdef CONFIG_SMP
+ clg %r9,BASED(.Lcleanup_idle_insn)
+ jl 3f
+ larl %r1,smp_cpu_mtid
+ llgf %r1,0(%r1)
+ ltgr %r1,%r1
+ jz 3f
+ .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+80(%r15)
+ larl %r3,mt_cycles
+ ag %r3,__LC_PERCPU_OFFSET
+ la %r4,__SF_EMPTY+16(%r15)
+2: lg %r0,0(%r3)
+ slg %r0,0(%r4)
+ alg %r0,64(%r4)
+ stg %r0,0(%r3)
+ la %r3,8(%r3)
+ la %r4,8(%r4)
+ brct %r1,2b
+#endif
+3: # account system time going idle
lg %r9,__LC_STEAL_TIMER
alg %r9,__CLOCK_IDLE_ENTER(%r2)
slg %r9,__LC_LAST_UPDATE_CLOCK
@@ -976,6 +1196,116 @@ cleanup_critical:
.Lcleanup_idle_insn:
.quad .Lpsw_idle_lpsw
+.Lcleanup_save_fpu_regs:
+ TSTMSK __LC_CPU_FLAGS,_CIF_FPU
+ bor %r14
+ clg %r9,BASED(.Lcleanup_save_fpu_regs_done)
+ jhe 5f
+ clg %r9,BASED(.Lcleanup_save_fpu_regs_fp)
+ jhe 4f
+ clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_high)
+ jhe 3f
+ clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_low)
+ jhe 2f
+ clg %r9,BASED(.Lcleanup_save_fpu_fpc_end)
+ jhe 1f
+ lg %r2,__LC_CURRENT
+ aghi %r2,__TASK_thread
+0: # Store floating-point controls
+ stfpc __THREAD_FPU_fpc(%r2)
+1: # Load register save area and check if VX is active
+ lg %r3,__THREAD_FPU_regs(%r2)
+ TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
+ jz 4f # no VX -> store FP regs
+2: # Store vector registers (V0-V15)
+ VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3)
+3: # Store vector registers (V16-V31)
+ VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3)
+ j 5f # -> done, set CIF_FPU flag
+4: # Store floating-point registers
+ std 0,0(%r3)
+ std 1,8(%r3)
+ std 2,16(%r3)
+ std 3,24(%r3)
+ std 4,32(%r3)
+ std 5,40(%r3)
+ std 6,48(%r3)
+ std 7,56(%r3)
+ std 8,64(%r3)
+ std 9,72(%r3)
+ std 10,80(%r3)
+ std 11,88(%r3)
+ std 12,96(%r3)
+ std 13,104(%r3)
+ std 14,112(%r3)
+ std 15,120(%r3)
+5: # Set CIF_FPU flag
+ oi __LC_CPU_FLAGS+7,_CIF_FPU
+ lg %r9,48(%r11) # return from save_fpu_regs
+ br %r14
+.Lcleanup_save_fpu_fpc_end:
+ .quad .Lsave_fpu_regs_fpc_end
+.Lcleanup_save_fpu_regs_vx_low:
+ .quad .Lsave_fpu_regs_vx_low
+.Lcleanup_save_fpu_regs_vx_high:
+ .quad .Lsave_fpu_regs_vx_high
+.Lcleanup_save_fpu_regs_fp:
+ .quad .Lsave_fpu_regs_fp
+.Lcleanup_save_fpu_regs_done:
+ .quad .Lsave_fpu_regs_done
+
+.Lcleanup_load_fpu_regs:
+ TSTMSK __LC_CPU_FLAGS,_CIF_FPU
+ bnor %r14
+ clg %r9,BASED(.Lcleanup_load_fpu_regs_done)
+ jhe 1f
+ clg %r9,BASED(.Lcleanup_load_fpu_regs_fp)
+ jhe 2f
+ clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_high)
+ jhe 3f
+ clg %r9,BASED(.Lcleanup_load_fpu_regs_vx)
+ jhe 4f
+ lg %r4,__LC_CURRENT
+ aghi %r4,__TASK_thread
+ lfpc __THREAD_FPU_fpc(%r4)
+ TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
+ lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area
+ jz 2f # -> no VX, load FP regs
+4: # Load V0 ..V15 registers
+ VLM %v0,%v15,0,%r4
+3: # Load V16..V31 registers
+ VLM %v16,%v31,256,%r4
+ j 1f
+2: # Load floating-point registers
+ ld 0,0(%r4)
+ ld 1,8(%r4)
+ ld 2,16(%r4)
+ ld 3,24(%r4)
+ ld 4,32(%r4)
+ ld 5,40(%r4)
+ ld 6,48(%r4)
+ ld 7,56(%r4)
+ ld 8,64(%r4)
+ ld 9,72(%r4)
+ ld 10,80(%r4)
+ ld 11,88(%r4)
+ ld 12,96(%r4)
+ ld 13,104(%r4)
+ ld 14,112(%r4)
+ ld 15,120(%r4)
+1: # Clear CIF_FPU bit
+ ni __LC_CPU_FLAGS+7,255-_CIF_FPU
+ lg %r9,48(%r11) # return from load_fpu_regs
+ br %r14
+.Lcleanup_load_fpu_regs_vx:
+ .quad .Lload_fpu_regs_vx
+.Lcleanup_load_fpu_regs_vx_high:
+ .quad .Lload_fpu_regs_vx_high
+.Lcleanup_load_fpu_regs_fp:
+ .quad .Lload_fpu_regs_fp
+.Lcleanup_load_fpu_regs_done:
+ .quad .Lload_fpu_regs_done
+
/*
* Integer constants
*/
@@ -984,62 +1314,11 @@ cleanup_critical:
.quad .L__critical_start
.Lcritical_length:
.quad .L__critical_end - .L__critical_start
-
-
#if IS_ENABLED(CONFIG_KVM)
-/*
- * sie64a calling convention:
- * %r2 pointer to sie control block
- * %r3 guest register save area
- */
-ENTRY(sie64a)
- stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
- stg %r2,__SF_EMPTY(%r15) # save control block pointer
- stg %r3,__SF_EMPTY+8(%r15) # save guest register save area
- xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
- lmg %r0,%r13,0(%r3) # load guest gprs 0-13
- lg %r14,__LC_GMAP # get gmap pointer
- ltgr %r14,%r14
- jz .Lsie_gmap
- lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce
-.Lsie_gmap:
- lg %r14,__SF_EMPTY(%r15) # get control block pointer
- oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
- tm __SIE_PROG20+3(%r14),1 # last exit...
- jnz .Lsie_done
- LPP __SF_EMPTY(%r15) # set guest id
- sie 0(%r14)
-.Lsie_done:
- LPP __SF_EMPTY+16(%r15) # set host id
- ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
- lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
-# some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions between sie64a and .Lsie_done should not cause program
-# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
-# See also HANDLE_SIE_INTERCEPT
-.Lrewind_pad:
- nop 0
- .globl sie_exit
-sie_exit:
- lg %r14,__SF_EMPTY+8(%r15) # load guest register save area
- stmg %r0,%r13,0(%r14) # save guest gprs 0-13
- lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
- lg %r2,__SF_EMPTY+24(%r15) # return exit reason code
- br %r14
-.Lsie_fault:
- lghi %r14,-EFAULT
- stg %r14,__SF_EMPTY+24(%r15) # set exit reason code
- j sie_exit
-
- .align 8
-.Lsie_critical:
+.Lsie_critical_start:
.quad .Lsie_gmap
.Lsie_critical_length:
.quad .Lsie_done - .Lsie_gmap
-
- EX_TABLE(.Lrewind_pad,.Lsie_fault)
- EX_TABLE(sie_exit,.Lsie_fault)
#endif
.section .rodata, "a"
diff --git a/kernel/arch/s390/kernel/entry.h b/kernel/arch/s390/kernel/entry.h
index 834df047d..b7019ab74 100644
--- a/kernel/arch/s390/kernel/entry.h
+++ b/kernel/arch/s390/kernel/entry.h
@@ -16,13 +16,10 @@ void io_int_handler(void);
void mcck_int_handler(void);
void restart_int_handler(void);
void restart_call_handler(void);
-void psw_idle(struct s390_idle_data *, unsigned long);
asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
-int alloc_vector_registers(struct task_struct *tsk);
-
void do_protection_exception(struct pt_regs *regs);
void do_dat_exception(struct pt_regs *regs);
diff --git a/kernel/arch/s390/kernel/head.S b/kernel/arch/s390/kernel/head.S
index 59b7c6470..301ee9c70 100644
--- a/kernel/arch/s390/kernel/head.S
+++ b/kernel/arch/s390/kernel/head.S
@@ -26,6 +26,7 @@
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/page.h>
+#include <asm/ptrace.h>
#define ARCH_OFFSET 4
@@ -59,19 +60,6 @@ __HEAD
.long 0x020006e0,0x20000050
.org 0x200
-#
-# subroutine to set architecture mode
-#
-.Lsetmode:
- mvi __LC_AR_MODE_ID,1 # set esame flag
- slr %r0,%r0 # set cpuid to zero
- lhi %r1,2 # mode 2 = esame (dump)
- sigp %r1,%r0,0x12 # switch to esame mode
- bras %r13,0f
- .fill 16,4,0x0
-0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
- sam31 # switch to 31 bit addressing mode
- br %r14
#
# subroutine to wait for end I/O
@@ -159,7 +147,14 @@ __HEAD
.long 0x02200050,0x00000000
iplstart:
- bas %r14,.Lsetmode # Immediately switch to 64 bit mode
+ mvi __LC_AR_MODE_ID,1 # set esame flag
+ slr %r0,%r0 # set cpuid to zero
+ lhi %r1,2 # mode 2 = esame (dump)
+ sigp %r1,%r0,0x12 # switch to esame mode
+ bras %r13,0f
+ .fill 16,4,0x0
+0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
+ sam31 # switch to 31 bit addressing mode
lh %r1,0xb8 # test if subchannel number
bct %r1,.Lnoload # is valid
l %r1,0xb8 # load ipl subchannel number
@@ -269,71 +264,6 @@ iplstart:
.Lcpuid:.fill 8,1,0
#
-# SALIPL loader support. Based on a patch by Rob van der Heij.
-# This entry point is called directly from the SALIPL loader and
-# doesn't need a builtin ipl record.
-#
- .org 0x800
-ENTRY(start)
- stm %r0,%r15,0x07b0 # store registers
- bas %r14,.Lsetmode # Immediately switch to 64 bit mode
- basr %r12,%r0
-.base:
- l %r11,.parm
- l %r8,.cmd # pointer to command buffer
-
- ltr %r9,%r9 # do we have SALIPL parameters?
- bp .sk8x8
-
- mvc 0(64,%r8),0x00b0 # copy saved registers
- xc 64(240-64,%r8),0(%r8) # remainder of buffer
- tr 0(64,%r8),.lowcase
- b .gotr
-.sk8x8:
- mvc 0(240,%r8),0(%r9) # copy iplparms into buffer
-.gotr:
- slr %r0,%r0
- st %r0,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r11)
- st %r0,INITRD_START+ARCH_OFFSET-PARMAREA(%r11)
- j startup # continue with startup
-.cmd: .long COMMAND_LINE # address of command line buffer
-.parm: .long PARMAREA
-.lowcase:
- .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
- .byte 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
- .byte 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17
- .byte 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f
- .byte 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27
- .byte 0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f
- .byte 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37
- .byte 0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f
- .byte 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47
- .byte 0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f
- .byte 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57
- .byte 0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f
- .byte 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67
- .byte 0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f
- .byte 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77
- .byte 0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f
-
- .byte 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87
- .byte 0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f
- .byte 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97
- .byte 0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f
- .byte 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7
- .byte 0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf
- .byte 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7
- .byte 0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf
- .byte 0xc0,0x81,0x82,0x83,0x84,0x85,0x86,0x87 # .abcdefg
- .byte 0x88,0x89,0xca,0xcb,0xcc,0xcd,0xce,0xcf # hi
- .byte 0xd0,0x91,0x92,0x93,0x94,0x95,0x96,0x97 # .jklmnop
- .byte 0x98,0x99,0xda,0xdb,0xdc,0xdd,0xde,0xdf # qr
- .byte 0xe0,0xe1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7 # ..stuvwx
- .byte 0xa8,0xa9,0xea,0xeb,0xec,0xed,0xee,0xef # yz
- .byte 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7
- .byte 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
-
-#
# startup-code at 0x10000, running in absolute addressing mode
# this is called either by the ipl loader or directly by PSW restart
# or linload or SALIPL
@@ -364,12 +294,13 @@ ENTRY(startup_kdump)
bras %r13,0f
.fill 16,4,0x0
0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
- sam31 # switch to 31 bit addressing mode
+ sam64 # switch to 64 bit addressing mode
basr %r13,0 # get base
.LPG0:
xc 0x200(256),0x200 # partially clear lowcore
xc 0x300(256),0x300
xc 0xe00(256),0xe00
+ lctlg %c0,%c15,0x200(%r0) # initialize control registers
stck __LC_LAST_UPDATE_CLOCK
spt 6f-.LPG0(%r13)
mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
@@ -394,7 +325,7 @@ ENTRY(startup_kdump)
jnz 1b
j 4f
2: l %r15,.Lstack-.LPG0(%r13)
- ahi %r15,-96
+ ahi %r15,-STACK_FRAME_OVERHEAD
la %r2,.Lals_string-.LPG0(%r13)
l %r3,.Lsclp_print-.LPG0(%r13)
basr %r14,%r3
@@ -413,9 +344,9 @@ ENTRY(startup_kdump)
# followed by the facility words.
#if defined(CONFIG_MARCH_Z13)
- .long 3, 0xc100eff2, 0xf46ce800, 0x00400000
+ .long 2, 0xc100eff2, 0xf46cc800
#elif defined(CONFIG_MARCH_ZEC12)
- .long 3, 0xc100eff2, 0xf46ce800, 0x00400000
+ .long 2, 0xc100eff2, 0xf46cc800
#elif defined(CONFIG_MARCH_Z196)
.long 2, 0xc100eff2, 0xf46c0000
#elif defined(CONFIG_MARCH_Z10)
@@ -428,8 +359,7 @@ ENTRY(startup_kdump)
.long 1, 0xc0000000
#endif
4:
- /* Continue with 64bit startup code in head64.S */
- sam64 # switch to 64 bit mode
+ /* Continue with startup code in head64.S */
jg startup_continue
.align 8
diff --git a/kernel/arch/s390/kernel/head64.S b/kernel/arch/s390/kernel/head64.S
index d7c005075..58b719fa8 100644
--- a/kernel/arch/s390/kernel/head64.S
+++ b/kernel/arch/s390/kernel/head64.S
@@ -16,7 +16,12 @@
__HEAD
ENTRY(startup_continue)
- larl %r1,sched_clock_base_cc
+ tm __LC_STFL_FAC_LIST+6,0x80 # LPP available ?
+ jz 0f
+ xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid
+ mvi __LC_LPP,0x80 # and set LPP_MAGIC
+ .insn s,0xb2800000,__LC_LPP # load program parameter
+0: larl %r1,sched_clock_base_cc
mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK
larl %r13,.LPG1 # get base
lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers
diff --git a/kernel/arch/s390/kernel/ipl.c b/kernel/arch/s390/kernel/ipl.c
index 52fbef91d..b1f0a90f9 100644
--- a/kernel/arch/s390/kernel/ipl.c
+++ b/kernel/arch/s390/kernel/ipl.c
@@ -17,6 +17,7 @@
#include <linux/gfp.h>
#include <linux/crash_dump.h>
#include <linux/debug_locks.h>
+#include <asm/diag.h>
#include <asm/ipl.h>
#include <asm/smp.h>
#include <asm/setup.h>
@@ -120,6 +121,7 @@ static char *dump_type_str(enum dump_type type)
* Must be in data section since the bss section
* is not cleared when these are accessed.
*/
+static u8 ipl_ssid __attribute__((__section__(".data"))) = 0;
static u16 ipl_devno __attribute__((__section__(".data"))) = 0;
u32 ipl_flags __attribute__((__section__(".data"))) = 0;
@@ -165,7 +167,7 @@ static struct ipl_parameter_block *dump_block_ccw;
static struct sclp_ipl_info sclp_ipl_info;
-int diag308(unsigned long subcode, void *addr)
+static inline int __diag308(unsigned long subcode, void *addr)
{
register unsigned long _addr asm("0") = (unsigned long) addr;
register unsigned long _rc asm("1") = 0;
@@ -178,6 +180,12 @@ int diag308(unsigned long subcode, void *addr)
: "d" (subcode) : "cc", "memory");
return _rc;
}
+
+int diag308(unsigned long subcode, void *addr)
+{
+ diag_stat_inc(DIAG_STAT_X308);
+ return __diag308(subcode, addr);
+}
EXPORT_SYMBOL_GPL(diag308);
/* SYSFS */
@@ -190,6 +198,33 @@ static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \
return snprintf(page, PAGE_SIZE, _format, ##args); \
}
+#define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk) \
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ unsigned long long ssid, devno; \
+ \
+ if (sscanf(buf, "0.%llx.%llx\n", &ssid, &devno) != 2) \
+ return -EINVAL; \
+ \
+ if (ssid > __MAX_SSID || devno > __MAX_SUBCHANNEL) \
+ return -EINVAL; \
+ \
+ _ipl_blk.ssid = ssid; \
+ _ipl_blk.devno = devno; \
+ return len; \
+}
+
+#define DEFINE_IPL_CCW_ATTR_RW(_prefix, _name, _ipl_blk) \
+IPL_ATTR_SHOW_FN(_prefix, _name, "0.%x.%04x\n", \
+ _ipl_blk.ssid, _ipl_blk.devno); \
+IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk); \
+static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
+ __ATTR(_name, (S_IRUGO | S_IWUSR), \
+ sys_##_prefix##_##_name##_show, \
+ sys_##_prefix##_##_name##_store) \
+
#define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value) \
IPL_ATTR_SHOW_FN(_prefix, _name, _format, _value) \
static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
@@ -388,7 +423,7 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj,
switch (ipl_info.type) {
case IPL_TYPE_CCW:
- return sprintf(page, "0.0.%04x\n", ipl_devno);
+ return sprintf(page, "0.%x.%04x\n", ipl_ssid, ipl_devno);
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
return sprintf(page, "0.0.%04x\n", ipl->ipl_info.fcp.devno);
@@ -680,21 +715,14 @@ static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
struct bin_attribute *attr,
char *buf, loff_t off, size_t count)
{
+ size_t scpdata_len = count;
size_t padding;
- size_t scpdata_len;
-
- if (off < 0)
- return -EINVAL;
- if (off >= DIAG308_SCPDATA_SIZE)
- return -ENOSPC;
- if (count > DIAG308_SCPDATA_SIZE - off)
- count = DIAG308_SCPDATA_SIZE - off;
-
- memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf + off, count);
- scpdata_len = off + count;
+ if (off)
+ return -EINVAL;
+ memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf, count);
if (scpdata_len % 8) {
padding = 8 - (scpdata_len % 8);
memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len,
@@ -710,7 +738,7 @@ static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
}
static struct bin_attribute sys_reipl_fcp_scp_data_attr =
__BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_fcp_scpdata_read,
- reipl_fcp_scpdata_write, PAGE_SIZE);
+ reipl_fcp_scpdata_write, DIAG308_SCPDATA_SIZE);
static struct bin_attribute *reipl_fcp_bin_attrs[] = {
&sys_reipl_fcp_scp_data_attr,
@@ -807,9 +835,7 @@ static struct attribute_group reipl_fcp_attr_group = {
};
/* CCW reipl device attributes */
-
-DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
- reipl_block_ccw->ipl_info.ccw.devno);
+DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ipl_info.ccw);
/* NSS wrapper */
static ssize_t reipl_nss_loadparm_show(struct kobject *kobj,
@@ -1049,8 +1075,8 @@ static void __reipl_run(void *unused)
switch (reipl_method) {
case REIPL_METHOD_CCW_CIO:
+ devid.ssid = reipl_block_ccw->ipl_info.ccw.ssid;
devid.devno = reipl_block_ccw->ipl_info.ccw.devno;
- devid.ssid = 0;
reipl_ccw_dev(&devid);
break;
case REIPL_METHOD_CCW_VM:
@@ -1185,6 +1211,7 @@ static int __init reipl_ccw_init(void)
reipl_block_ccw_init(reipl_block_ccw);
if (ipl_info.type == IPL_TYPE_CCW) {
+ reipl_block_ccw->ipl_info.ccw.ssid = ipl_ssid;
reipl_block_ccw->ipl_info.ccw.devno = ipl_devno;
reipl_block_ccw_fill_parms(reipl_block_ccw);
}
@@ -1329,9 +1356,7 @@ static struct attribute_group dump_fcp_attr_group = {
};
/* CCW dump device attributes */
-
-DEFINE_IPL_ATTR_RW(dump_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
- dump_block_ccw->ipl_info.ccw.devno);
+DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ipl_info.ccw);
static struct attribute *dump_ccw_attrs[] = {
&sys_dump_ccw_device_attr.attr,
@@ -1411,8 +1436,8 @@ static void __dump_run(void *unused)
switch (dump_method) {
case DUMP_METHOD_CCW_CIO:
+ devid.ssid = dump_block_ccw->ipl_info.ccw.ssid;
devid.devno = dump_block_ccw->ipl_info.ccw.devno;
- devid.ssid = 0;
reipl_ccw_dev(&devid);
break;
case DUMP_METHOD_CCW_VM:
@@ -1932,14 +1957,14 @@ void __init setup_ipl(void)
ipl_info.type = get_ipl_type();
switch (ipl_info.type) {
case IPL_TYPE_CCW:
+ ipl_info.data.ccw.dev_id.ssid = ipl_ssid;
ipl_info.data.ccw.dev_id.devno = ipl_devno;
- ipl_info.data.ccw.dev_id.ssid = 0;
break;
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
+ ipl_info.data.fcp.dev_id.ssid = 0;
ipl_info.data.fcp.dev_id.devno =
IPL_PARMBLOCK_START->ipl_info.fcp.devno;
- ipl_info.data.fcp.dev_id.ssid = 0;
ipl_info.data.fcp.wwpn = IPL_PARMBLOCK_START->ipl_info.fcp.wwpn;
ipl_info.data.fcp.lun = IPL_PARMBLOCK_START->ipl_info.fcp.lun;
break;
@@ -1971,6 +1996,7 @@ void __init ipl_save_parameters(void)
if (cio_get_iplinfo(&iplinfo))
return;
+ ipl_ssid = iplinfo.ssid;
ipl_devno = iplinfo.devno;
ipl_flags |= IPL_DEVNO_VALID;
if (!iplinfo.is_qdio)
diff --git a/kernel/arch/s390/kernel/irq.c b/kernel/arch/s390/kernel/irq.c
index e9d9addfa..f41d5208a 100644
--- a/kernel/arch/s390/kernel/irq.c
+++ b/kernel/arch/s390/kernel/irq.c
@@ -69,7 +69,6 @@ static const struct irq_class irqclass_sub_desc[] = {
{.irq = IRQEXT_IUC, .name = "IUC", .desc = "[EXT] IUCV"},
{.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
{.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
- {.irq = IRQEXT_CMR, .name = "CMR", .desc = "[EXT] CPU-Measurement: RI"},
{.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"},
{.irq = IRQIO_CIO, .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
{.irq = IRQIO_QAI, .name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"},
diff --git a/kernel/arch/s390/kernel/jump_label.c b/kernel/arch/s390/kernel/jump_label.c
index a90299600..083b05f5f 100644
--- a/kernel/arch/s390/kernel/jump_label.c
+++ b/kernel/arch/s390/kernel/jump_label.c
@@ -44,12 +44,9 @@ static void jump_label_bug(struct jump_entry *entry, struct insn *expected,
unsigned char *ipn = (unsigned char *)new;
pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc);
- pr_emerg("Found: %02x %02x %02x %02x %02x %02x\n",
- ipc[0], ipc[1], ipc[2], ipc[3], ipc[4], ipc[5]);
- pr_emerg("Expected: %02x %02x %02x %02x %02x %02x\n",
- ipe[0], ipe[1], ipe[2], ipe[3], ipe[4], ipe[5]);
- pr_emerg("New: %02x %02x %02x %02x %02x %02x\n",
- ipn[0], ipn[1], ipn[2], ipn[3], ipn[4], ipn[5]);
+ pr_emerg("Found: %6ph\n", ipc);
+ pr_emerg("Expected: %6ph\n", ipe);
+ pr_emerg("New: %6ph\n", ipn);
panic("Corrupted kernel text");
}
@@ -64,7 +61,7 @@ static void __jump_label_transform(struct jump_entry *entry,
{
struct insn old, new;
- if (type == JUMP_LABEL_ENABLE) {
+ if (type == JUMP_LABEL_JMP) {
jump_label_make_nop(entry, &old);
jump_label_make_branch(entry, &new);
} else {
diff --git a/kernel/arch/s390/kernel/nmi.c b/kernel/arch/s390/kernel/nmi.c
index 56b550893..07302ce37 100644
--- a/kernel/arch/s390/kernel/nmi.c
+++ b/kernel/arch/s390/kernel/nmi.c
@@ -24,15 +24,17 @@
#include <asm/ctl_reg.h>
struct mcck_struct {
- int kill_task;
- int channel_report;
- int warning;
- unsigned long long mcck_code;
+ unsigned int kill_task : 1;
+ unsigned int channel_report : 1;
+ unsigned int warning : 1;
+ unsigned int etr_queue : 1;
+ unsigned int stp_queue : 1;
+ unsigned long mcck_code;
};
static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
-static void s390_handle_damage(char *msg)
+static void s390_handle_damage(void)
{
smp_send_stop();
disabled_wait((unsigned long) __builtin_return_address(0));
@@ -80,10 +82,14 @@ void s390_handle_mcck(void)
if (xchg(&mchchk_wng_posted, 1) == 0)
kill_cad_pid(SIGPWR, 1);
}
+ if (mcck.etr_queue)
+ etr_queue_work();
+ if (mcck.stp_queue)
+ stp_queue_work();
if (mcck.kill_task) {
local_irq_enable();
printk(KERN_EMERG "mcck: Terminating task because of machine "
- "malfunction (code 0x%016llx).\n", mcck.mcck_code);
+ "malfunction (code 0x%016lx).\n", mcck.mcck_code);
printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
current->comm, current->pid);
do_exit(SIGSEGV);
@@ -95,7 +101,7 @@ EXPORT_SYMBOL_GPL(s390_handle_mcck);
* returns 0 if all registers could be validated
* returns 1 otherwise
*/
-static int notrace s390_revalidate_registers(struct mci *mci)
+static int notrace s390_validate_registers(union mci mci)
{
int kill_task;
u64 zero;
@@ -104,14 +110,14 @@ static int notrace s390_revalidate_registers(struct mci *mci)
kill_task = 0;
zero = 0;
- if (!mci->gr) {
+ if (!mci.gr) {
/*
* General purpose registers couldn't be restored and have
* unknown contents. Process needs to be terminated.
*/
kill_task = 1;
}
- if (!mci->fp) {
+ if (!mci.fp) {
/*
* Floating point registers can't be restored and
* therefore the process needs to be terminated.
@@ -120,7 +126,7 @@ static int notrace s390_revalidate_registers(struct mci *mci)
}
fpt_save_area = &S390_lowcore.floating_pt_save_area;
fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
- if (!mci->fc) {
+ if (!mci.fc) {
/*
* Floating point control register can't be restored.
* Task will be terminated.
@@ -131,7 +137,7 @@ static int notrace s390_revalidate_registers(struct mci *mci)
asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
if (!MACHINE_HAS_VX) {
- /* Revalidate floating point registers */
+ /* Validate floating point registers */
asm volatile(
" ld 0,0(%0)\n"
" ld 1,8(%0)\n"
@@ -151,10 +157,10 @@ static int notrace s390_revalidate_registers(struct mci *mci)
" ld 15,120(%0)\n"
: : "a" (fpt_save_area));
} else {
- /* Revalidate vector registers */
+ /* Validate vector registers */
union ctlreg0 cr0;
- if (!mci->vr) {
+ if (!mci.vr) {
/*
* Vector registers can't be restored and therefore
* the process needs to be terminated.
@@ -164,42 +170,46 @@ static int notrace s390_revalidate_registers(struct mci *mci)
cr0.val = S390_lowcore.cregs_save_area[0];
cr0.afp = cr0.vx = 1;
__ctl_load(cr0.val, 0, 0);
- restore_vx_regs((__vector128 *)
- &S390_lowcore.vector_save_area);
+ asm volatile(
+ " la 1,%0\n"
+ " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */
+ " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */
+ : : "Q" (*(struct vx_array *)
+ &S390_lowcore.vector_save_area) : "1");
__ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
}
- /* Revalidate access registers */
+ /* Validate access registers */
asm volatile(
" lam 0,15,0(%0)"
: : "a" (&S390_lowcore.access_regs_save_area));
- if (!mci->ar) {
+ if (!mci.ar) {
/*
* Access registers have unknown contents.
* Terminating task.
*/
kill_task = 1;
}
- /* Revalidate control registers */
- if (!mci->cr) {
+ /* Validate control registers */
+ if (!mci.cr) {
/*
* Control registers have unknown contents.
* Can't recover and therefore stopping machine.
*/
- s390_handle_damage("invalid control registers.");
+ s390_handle_damage();
} else {
asm volatile(
" lctlg 0,15,0(%0)"
: : "a" (&S390_lowcore.cregs_save_area));
}
/*
- * We don't even try to revalidate the TOD register, since we simply
+ * We don't even try to validate the TOD register, since we simply
* can't write something sensible into that register.
*/
/*
- * See if we can revalidate the TOD programmable register with its
+ * See if we can validate the TOD programmable register with its
* old contents (should be zero) otherwise set it to zero.
*/
- if (!mci->pr)
+ if (!mci.pr)
asm volatile(
" sr 0,0\n"
" sckpf"
@@ -210,17 +220,17 @@ static int notrace s390_revalidate_registers(struct mci *mci)
" sckpf"
: : "a" (&S390_lowcore.tod_progreg_save_area)
: "0", "cc");
- /* Revalidate clock comparator register */
+ /* Validate clock comparator register */
set_clock_comparator(S390_lowcore.clock_comparator);
/* Check if old PSW is valid */
- if (!mci->wp)
+ if (!mci.wp)
/*
* Can't tell if we come from user or kernel mode
* -> stopping machine.
*/
- s390_handle_damage("old psw invalid.");
+ s390_handle_damage();
- if (!mci->ms || !mci->pm || !mci->ia)
+ if (!mci.ms || !mci.pm || !mci.ia)
kill_task = 1;
return kill_task;
@@ -244,21 +254,21 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
static unsigned long long last_ipd;
struct mcck_struct *mcck;
unsigned long long tmp;
- struct mci *mci;
+ union mci mci;
int umode;
nmi_enter();
inc_irq_stat(NMI_NMI);
- mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
+ mci.val = S390_lowcore.mcck_interruption_code;
mcck = this_cpu_ptr(&cpu_mcck);
umode = user_mode(regs);
- if (mci->sd) {
+ if (mci.sd) {
/* System damage -> stopping machine */
- s390_handle_damage("received system damage machine check.");
+ s390_handle_damage();
}
- if (mci->pd) {
- if (mci->b) {
+ if (mci.pd) {
+ if (mci.b) {
/* Processing backup -> verify if we can survive this */
u64 z_mcic, o_mcic, t_mcic;
z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
@@ -266,12 +276,11 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
1ULL<<16);
- t_mcic = *(u64 *)mci;
+ t_mcic = mci.val;
if (((t_mcic & z_mcic) != 0) ||
((t_mcic & o_mcic) != o_mcic)) {
- s390_handle_damage("processing backup machine "
- "check with damage.");
+ s390_handle_damage();
}
/*
@@ -286,64 +295,62 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
ipd_count = 1;
last_ipd = tmp;
if (ipd_count == MAX_IPD_COUNT)
- s390_handle_damage("too many ipd retries.");
+ s390_handle_damage();
spin_unlock(&ipd_lock);
} else {
/* Processing damage -> stopping machine */
- s390_handle_damage("received instruction processing "
- "damage machine check.");
+ s390_handle_damage();
}
}
- if (s390_revalidate_registers(mci)) {
+ if (s390_validate_registers(mci)) {
if (umode) {
/*
* Couldn't restore all register contents while in
* user mode -> mark task for termination.
*/
mcck->kill_task = 1;
- mcck->mcck_code = *(unsigned long long *) mci;
+ mcck->mcck_code = mci.val;
set_cpu_flag(CIF_MCCK_PENDING);
} else {
/*
* Couldn't restore all register contents while in
* kernel mode -> stopping machine.
*/
- s390_handle_damage("unable to revalidate registers.");
+ s390_handle_damage();
}
}
- if (mci->cd) {
+ if (mci.cd) {
/* Timing facility damage */
- s390_handle_damage("TOD clock damaged");
+ s390_handle_damage();
}
- if (mci->ed && mci->ec) {
+ if (mci.ed && mci.ec) {
/* External damage */
if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC))
- etr_sync_check();
+ mcck->etr_queue |= etr_sync_check();
if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH))
- etr_switch_to_local();
+ mcck->etr_queue |= etr_switch_to_local();
if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
- stp_sync_check();
+ mcck->stp_queue |= stp_sync_check();
if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
- stp_island_check();
+ mcck->stp_queue |= stp_island_check();
+ if (mcck->etr_queue || mcck->stp_queue)
+ set_cpu_flag(CIF_MCCK_PENDING);
}
- if (mci->se)
+ if (mci.se)
/* Storage error uncorrected */
- s390_handle_damage("received storage error uncorrected "
- "machine check.");
- if (mci->ke)
+ s390_handle_damage();
+ if (mci.ke)
/* Storage key-error uncorrected */
- s390_handle_damage("received storage key-error uncorrected "
- "machine check.");
- if (mci->ds && mci->fa)
+ s390_handle_damage();
+ if (mci.ds && mci.fa)
/* Storage degradation */
- s390_handle_damage("received storage degradation machine "
- "check.");
- if (mci->cp) {
+ s390_handle_damage();
+ if (mci.cp) {
/* Channel report word pending */
mcck->channel_report = 1;
set_cpu_flag(CIF_MCCK_PENDING);
}
- if (mci->w) {
+ if (mci.w) {
/* Warning pending */
mcck->warning = 1;
set_cpu_flag(CIF_MCCK_PENDING);
@@ -358,4 +365,4 @@ static int __init machine_check_init(void)
ctl_set_bit(14, 24); /* enable warning MCH */
return 0;
}
-arch_initcall(machine_check_init);
+early_initcall(machine_check_init);
diff --git a/kernel/arch/s390/kernel/perf_cpum_cf.c b/kernel/arch/s390/kernel/perf_cpum_cf.c
index 56fdad479..929c147e0 100644
--- a/kernel/arch/s390/kernel/perf_cpum_cf.c
+++ b/kernel/arch/s390/kernel/perf_cpum_cf.c
@@ -72,6 +72,7 @@ struct cpu_hw_events {
atomic_t ctr_set[CPUMF_CTR_SET_MAX];
u64 state, tx_state;
unsigned int flags;
+ unsigned int txn_flags;
};
static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
.ctr_set = {
@@ -82,6 +83,7 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
},
.state = 0,
.flags = 0,
+ .txn_flags = 0,
};
static int get_counter_set(u64 event)
@@ -157,10 +159,14 @@ static int validate_ctr_auth(const struct hw_perf_event *hwc)
cpuhw = &get_cpu_var(cpu_hw_events);
- /* check authorization for cpu counter sets */
+ /* Check authorization for cpu counter sets.
+ * If the particular CPU counter set is not authorized,
+ * return with -ENOENT in order to fall back to other
+ * PMUs that might suffice the event request.
+ */
ctrs_state = cpumf_state_ctl[hwc->config_base];
if (!(ctrs_state & cpuhw->info.auth_ctl))
- err = -EPERM;
+ err = -ENOENT;
put_cpu_var(cpu_hw_events);
return err;
@@ -534,9 +540,9 @@ static int cpumf_pmu_add(struct perf_event *event, int flags)
* For group events transaction, the authorization check is
* done in cpumf_pmu_commit_txn().
*/
- if (!(cpuhw->flags & PERF_EVENT_TXN))
+ if (!(cpuhw->txn_flags & PERF_PMU_TXN_ADD))
if (validate_ctr_auth(&event->hw))
- return -EPERM;
+ return -ENOENT;
ctr_set_enable(&cpuhw->state, event->hw.config_base);
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
@@ -572,13 +578,22 @@ static void cpumf_pmu_del(struct perf_event *event, int flags)
/*
* Start group events scheduling transaction.
* Set flags to perform a single test at commit time.
+ *
+ * We only support PERF_PMU_TXN_ADD transactions. Save the
+ * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
+ * transactions.
*/
-static void cpumf_pmu_start_txn(struct pmu *pmu)
+static void cpumf_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
{
struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+ WARN_ON_ONCE(cpuhw->txn_flags); /* txn already in flight */
+
+ cpuhw->txn_flags = txn_flags;
+ if (txn_flags & ~PERF_PMU_TXN_ADD)
+ return;
+
perf_pmu_disable(pmu);
- cpuhw->flags |= PERF_EVENT_TXN;
cpuhw->tx_state = cpuhw->state;
}
@@ -589,11 +604,18 @@ static void cpumf_pmu_start_txn(struct pmu *pmu)
*/
static void cpumf_pmu_cancel_txn(struct pmu *pmu)
{
+ unsigned int txn_flags;
struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+ WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */
+
+ txn_flags = cpuhw->txn_flags;
+ cpuhw->txn_flags = 0;
+ if (txn_flags & ~PERF_PMU_TXN_ADD)
+ return;
+
WARN_ON(cpuhw->tx_state != cpuhw->state);
- cpuhw->flags &= ~PERF_EVENT_TXN;
perf_pmu_enable(pmu);
}
@@ -607,13 +629,20 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu)
struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
u64 state;
+ WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */
+
+ if (cpuhw->txn_flags & ~PERF_PMU_TXN_ADD) {
+ cpuhw->txn_flags = 0;
+ return 0;
+ }
+
/* check if the updated state can be scheduled */
state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
state >>= CPUMF_LCCTL_ENABLE_SHIFT;
if ((state & cpuhw->info.auth_ctl) != state)
- return -EPERM;
+ return -ENOENT;
- cpuhw->flags &= ~PERF_EVENT_TXN;
+ cpuhw->txn_flags = 0;
perf_pmu_enable(pmu);
return 0;
}
diff --git a/kernel/arch/s390/kernel/perf_cpum_sf.c b/kernel/arch/s390/kernel/perf_cpum_sf.c
index e6a1578fc..3d8da1e74 100644
--- a/kernel/arch/s390/kernel/perf_cpum_sf.c
+++ b/kernel/arch/s390/kernel/perf_cpum_sf.c
@@ -1019,14 +1019,13 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
break;
}
- /* The host-program-parameter (hpp) contains the sie control
- * block that is set by sie64a() in entry64.S. Check if hpp
- * refers to a valid control block and set sde_regs flags
- * accordingly. This would allow to use hpp values for other
- * purposes too.
- * For now, simply use a non-zero value as guest indicator.
+ /*
+ * A non-zero guest program parameter indicates a guest
+ * sample.
+ * Note that some early samples might be misaccounted to
+ * the host.
*/
- if (sfr->basic.hpp)
+ if (sfr->basic.gpp)
sde_regs->in_guest = 1;
overflow = 0;
@@ -1572,7 +1571,7 @@ static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
}
#define param_check_sfb_size(name, p) __param_check(name, p, void)
-static struct kernel_param_ops param_ops_sfb_size = {
+static const struct kernel_param_ops param_ops_sfb_size = {
.set = param_set_sfb_size,
.get = param_get_sfb_size,
};
diff --git a/kernel/arch/s390/kernel/process.c b/kernel/arch/s390/kernel/process.c
index 8f587d871..114ee8b96 100644
--- a/kernel/arch/s390/kernel/process.c
+++ b/kernel/arch/s390/kernel/process.c
@@ -23,6 +23,7 @@
#include <linux/kprobes.h>
#include <linux/random.h>
#include <linux/module.h>
+#include <linux/init_task.h>
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/vtimer.h>
@@ -36,6 +37,9 @@
asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
+/* FPU save area for the init task */
+__vector128 init_task_fpu_regs[__NUM_VXRS] __init_task_data;
+
/*
* Return saved PC of a blocked thread. used in kernel/sched.
* resume in entry.S does not create a new stack frame, it
@@ -81,8 +85,36 @@ void release_thread(struct task_struct *dead_task)
void arch_release_task_struct(struct task_struct *tsk)
{
- if (tsk->thread.vxrs)
- kfree(tsk->thread.vxrs);
+ /* Free either the floating-point or the vector register save area */
+ kfree(tsk->thread.fpu.regs);
+}
+
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+{
+ size_t fpu_regs_size;
+
+ *dst = *src;
+
+ /*
+ * If the vector extension is available, it is enabled for all tasks,
+ * and, thus, the FPU register save area must be allocated accordingly.
+ */
+ fpu_regs_size = MACHINE_HAS_VX ? sizeof(__vector128) * __NUM_VXRS
+ : sizeof(freg_t) * __NUM_FPRS;
+ dst->thread.fpu.regs = kzalloc(fpu_regs_size, GFP_KERNEL|__GFP_REPEAT);
+ if (!dst->thread.fpu.regs)
+ return -ENOMEM;
+
+ /*
+ * Save the floating-point or vector register state of the current
+ * task and set the CIF_FPU flag to lazy restore the FPU register
+ * state when returning to user space.
+ */
+ save_fpu_regs();
+ dst->thread.fpu.fpc = current->thread.fpu.fpc;
+ memcpy(dst->thread.fpu.regs, current->thread.fpu.regs, fpu_regs_size);
+
+ return 0;
}
int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
@@ -139,14 +171,8 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
/* Don't copy runtime instrumentation info */
p->thread.ri_cb = NULL;
- p->thread.ri_signum = 0;
frame->childregs.psw.mask &= ~PSW_MASK_RI;
- /* Save the fpu registers to new thread structure. */
- save_fp_ctl(&p->thread.fp_regs.fpc);
- save_fp_regs(p->thread.fp_regs.fprs);
- p->thread.fp_regs.pad = 0;
- p->thread.vxrs = NULL;
/* Set a new TLS ? */
if (clone_flags & CLONE_SETTLS) {
unsigned long tls = frame->childregs.gprs[6];
@@ -162,7 +188,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
asmlinkage void execve_tail(void)
{
- current->thread.fp_regs.fpc = 0;
+ current->thread.fpu.fpc = 0;
asm volatile("sfpc %0" : : "d" (0));
}
@@ -171,8 +197,15 @@ asmlinkage void execve_tail(void)
*/
int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
{
- save_fp_ctl(&fpregs->fpc);
- save_fp_regs(fpregs->fprs);
+ save_fpu_regs();
+ fpregs->fpc = current->thread.fpu.fpc;
+ fpregs->pad = 0;
+ if (MACHINE_HAS_VX)
+ convert_vx_to_fp((freg_t *)&fpregs->fprs,
+ current->thread.fpu.vxrs);
+ else
+ memcpy(&fpregs->fprs, current->thread.fpu.fprs,
+ sizeof(fpregs->fprs));
return 1;
}
EXPORT_SYMBOL(dump_fpu);
@@ -210,11 +243,7 @@ unsigned long arch_align_stack(unsigned long sp)
static inline unsigned long brk_rnd(void)
{
- /* 8MB for 32bit, 1GB for 64bit */
- if (is_32bit_task())
- return (get_random_int() & 0x7ffUL) << PAGE_SHIFT;
- else
- return (get_random_int() & 0x3ffffUL) << PAGE_SHIFT;
+ return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT;
}
unsigned long arch_randomize_brk(struct mm_struct *mm)
diff --git a/kernel/arch/s390/kernel/processor.c b/kernel/arch/s390/kernel/processor.c
index dc488e13b..7ce00e7a7 100644
--- a/kernel/arch/s390/kernel/processor.c
+++ b/kernel/arch/s390/kernel/processor.c
@@ -11,6 +11,7 @@
#include <linux/seq_file.h>
#include <linux/delay.h>
#include <linux/cpu.h>
+#include <asm/diag.h>
#include <asm/elf.h>
#include <asm/lowcore.h>
#include <asm/param.h>
@@ -20,8 +21,10 @@ static DEFINE_PER_CPU(struct cpuid, cpu_id);
void notrace cpu_relax(void)
{
- if (!smp_cpu_mtid && MACHINE_HAS_DIAG44)
+ if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) {
+ diag_stat_inc(DIAG_STAT_X044);
asm volatile("diag 0,0,0x44");
+ }
barrier();
}
EXPORT_SYMBOL(cpu_relax);
@@ -41,6 +44,15 @@ void cpu_init(void)
}
/*
+ * cpu_have_feature - Test CPU features on module initialization
+ */
+int cpu_have_feature(unsigned int num)
+{
+ return elf_hwcap & (1UL << num);
+}
+EXPORT_SYMBOL(cpu_have_feature);
+
+/*
* show_cpuinfo - Get information on one CPU for use by procfs.
*/
static int show_cpuinfo(struct seq_file *m, void *v)
diff --git a/kernel/arch/s390/kernel/ptrace.c b/kernel/arch/s390/kernel/ptrace.c
index d363c9c32..01c37b36c 100644
--- a/kernel/arch/s390/kernel/ptrace.c
+++ b/kernel/arch/s390/kernel/ptrace.c
@@ -45,39 +45,27 @@ void update_cr_regs(struct task_struct *task)
struct per_regs old, new;
/* Take care of the enable/disable of transactional execution. */
- if (MACHINE_HAS_TE || MACHINE_HAS_VX) {
+ if (MACHINE_HAS_TE) {
unsigned long cr, cr_new;
__ctl_store(cr, 0, 0);
- cr_new = cr;
- if (MACHINE_HAS_TE) {
- /* Set or clear transaction execution TXC bit 8. */
- cr_new |= (1UL << 55);
- if (task->thread.per_flags & PER_FLAG_NO_TE)
- cr_new &= ~(1UL << 55);
- }
- if (MACHINE_HAS_VX) {
- /* Enable/disable of vector extension */
- cr_new &= ~(1UL << 17);
- if (task->thread.vxrs)
- cr_new |= (1UL << 17);
- }
+ /* Set or clear transaction execution TXC bit 8. */
+ cr_new = cr | (1UL << 55);
+ if (task->thread.per_flags & PER_FLAG_NO_TE)
+ cr_new &= ~(1UL << 55);
if (cr_new != cr)
__ctl_load(cr_new, 0, 0);
- if (MACHINE_HAS_TE) {
- /* Set/clear transaction execution TDC bits 62/63. */
- __ctl_store(cr, 2, 2);
- cr_new = cr & ~3UL;
- if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
- if (task->thread.per_flags &
- PER_FLAG_TE_ABORT_RAND_TEND)
- cr_new |= 1UL;
- else
- cr_new |= 2UL;
- }
- if (cr_new != cr)
- __ctl_load(cr_new, 2, 2);
+ /* Set or clear transaction execution TDC bits 62 and 63. */
+ __ctl_store(cr, 2, 2);
+ cr_new = cr & ~3UL;
+ if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
+ if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
+ cr_new |= 1UL;
+ else
+ cr_new |= 2UL;
}
+ if (cr_new != cr)
+ __ctl_load(cr_new, 2, 2);
}
/* Copy user specified PER registers */
new.control = thread->per_user.control;
@@ -242,21 +230,21 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
/*
* floating point control reg. is in the thread structure
*/
- tmp = child->thread.fp_regs.fpc;
+ tmp = child->thread.fpu.fpc;
tmp <<= BITS_PER_LONG - 32;
} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
/*
- * floating point regs. are either in child->thread.fp_regs
- * or the child->thread.vxrs array
+ * floating point regs. are either in child->thread.fpu
+ * or the child->thread.fpu.vxrs array
*/
offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
- if (child->thread.vxrs)
+ if (MACHINE_HAS_VX)
tmp = *(addr_t *)
- ((addr_t) child->thread.vxrs + 2*offset);
+ ((addr_t) child->thread.fpu.vxrs + 2*offset);
else
tmp = *(addr_t *)
- ((addr_t) &child->thread.fp_regs.fprs + offset);
+ ((addr_t) child->thread.fpu.fprs + offset);
} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
/*
@@ -387,20 +375,20 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
if ((unsigned int) data != 0 ||
test_fp_ctl(data >> (BITS_PER_LONG - 32)))
return -EINVAL;
- child->thread.fp_regs.fpc = data >> (BITS_PER_LONG - 32);
+ child->thread.fpu.fpc = data >> (BITS_PER_LONG - 32);
} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
/*
- * floating point regs. are either in child->thread.fp_regs
- * or the child->thread.vxrs array
+ * floating point regs. are either in child->thread.fpu
+ * or the child->thread.fpu.vxrs array
*/
offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
- if (child->thread.vxrs)
+ if (MACHINE_HAS_VX)
*(addr_t *)((addr_t)
- child->thread.vxrs + 2*offset) = data;
+ child->thread.fpu.vxrs + 2*offset) = data;
else
*(addr_t *)((addr_t)
- &child->thread.fp_regs.fprs + offset) = data;
+ child->thread.fpu.fprs + offset) = data;
} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
/*
@@ -621,20 +609,20 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
/*
* floating point control reg. is in the thread structure
*/
- tmp = child->thread.fp_regs.fpc;
+ tmp = child->thread.fpu.fpc;
} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
/*
- * floating point regs. are either in child->thread.fp_regs
- * or the child->thread.vxrs array
+ * floating point regs. are either in child->thread.fpu
+ * or the child->thread.fpu.vxrs array
*/
offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
- if (child->thread.vxrs)
+ if (MACHINE_HAS_VX)
tmp = *(__u32 *)
- ((addr_t) child->thread.vxrs + 2*offset);
+ ((addr_t) child->thread.fpu.vxrs + 2*offset);
else
tmp = *(__u32 *)
- ((addr_t) &child->thread.fp_regs.fprs + offset);
+ ((addr_t) child->thread.fpu.fprs + offset);
} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
/*
@@ -746,20 +734,20 @@ static int __poke_user_compat(struct task_struct *child,
*/
if (test_fp_ctl(tmp))
return -EINVAL;
- child->thread.fp_regs.fpc = data;
+ child->thread.fpu.fpc = data;
} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
/*
- * floating point regs. are either in child->thread.fp_regs
- * or the child->thread.vxrs array
+ * floating point regs. are either in child->thread.fpu
+ * or the child->thread.fpu.vxrs array
*/
offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
- if (child->thread.vxrs)
+ if (MACHINE_HAS_VX)
*(__u32 *)((addr_t)
- child->thread.vxrs + 2*offset) = tmp;
+ child->thread.fpu.vxrs + 2*offset) = tmp;
else
*(__u32 *)((addr_t)
- &child->thread.fp_regs.fprs + offset) = tmp;
+ child->thread.fpu.fprs + offset) = tmp;
} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
/*
@@ -952,18 +940,16 @@ static int s390_fpregs_get(struct task_struct *target,
const struct user_regset *regset, unsigned int pos,
unsigned int count, void *kbuf, void __user *ubuf)
{
- if (target == current) {
- save_fp_ctl(&target->thread.fp_regs.fpc);
- save_fp_regs(target->thread.fp_regs.fprs);
- } else if (target->thread.vxrs) {
- int i;
+ _s390_fp_regs fp_regs;
+
+ if (target == current)
+ save_fpu_regs();
+
+ fp_regs.fpc = target->thread.fpu.fpc;
+ fpregs_store(&fp_regs, &target->thread.fpu);
- for (i = 0; i < __NUM_VXRS_LOW; i++)
- target->thread.fp_regs.fprs[i] =
- *(freg_t *)(target->thread.vxrs + i);
- }
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.fp_regs, 0, -1);
+ &fp_regs, 0, -1);
}
static int s390_fpregs_set(struct task_struct *target,
@@ -972,41 +958,33 @@ static int s390_fpregs_set(struct task_struct *target,
const void __user *ubuf)
{
int rc = 0;
+ freg_t fprs[__NUM_FPRS];
- if (target == current) {
- save_fp_ctl(&target->thread.fp_regs.fpc);
- save_fp_regs(target->thread.fp_regs.fprs);
- }
+ if (target == current)
+ save_fpu_regs();
/* If setting FPC, must validate it first. */
if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
- u32 ufpc[2] = { target->thread.fp_regs.fpc, 0 };
+ u32 ufpc[2] = { target->thread.fpu.fpc, 0 };
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
0, offsetof(s390_fp_regs, fprs));
if (rc)
return rc;
if (ufpc[1] != 0 || test_fp_ctl(ufpc[0]))
return -EINVAL;
- target->thread.fp_regs.fpc = ufpc[0];
+ target->thread.fpu.fpc = ufpc[0];
}
if (rc == 0 && count > 0)
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- target->thread.fp_regs.fprs,
- offsetof(s390_fp_regs, fprs), -1);
-
- if (rc == 0) {
- if (target == current) {
- restore_fp_ctl(&target->thread.fp_regs.fpc);
- restore_fp_regs(target->thread.fp_regs.fprs);
- } else if (target->thread.vxrs) {
- int i;
-
- for (i = 0; i < __NUM_VXRS_LOW; i++)
- *(freg_t *)(target->thread.vxrs + i) =
- target->thread.fp_regs.fprs[i];
- }
- }
+ fprs, offsetof(s390_fp_regs, fprs), -1);
+ if (rc)
+ return rc;
+
+ if (MACHINE_HAS_VX)
+ convert_fp_to_vx(target->thread.fpu.vxrs, fprs);
+ else
+ memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs));
return rc;
}
@@ -1069,13 +1047,10 @@ static int s390_vxrs_low_get(struct task_struct *target,
if (!MACHINE_HAS_VX)
return -ENODEV;
- if (target->thread.vxrs) {
- if (target == current)
- save_vx_regs(target->thread.vxrs);
- for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = *((__u64 *)(target->thread.vxrs + i) + 1);
- } else
- memset(vxrs, 0, sizeof(vxrs));
+ if (target == current)
+ save_fpu_regs();
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
}
@@ -1089,20 +1064,13 @@ static int s390_vxrs_low_set(struct task_struct *target,
if (!MACHINE_HAS_VX)
return -ENODEV;
- if (!target->thread.vxrs) {
- rc = alloc_vector_registers(target);
- if (rc)
- return rc;
- } else if (target == current)
- save_vx_regs(target->thread.vxrs);
+ if (target == current)
+ save_fpu_regs();
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
- if (rc == 0) {
+ if (rc == 0)
for (i = 0; i < __NUM_VXRS_LOW; i++)
- *((__u64 *)(target->thread.vxrs + i) + 1) = vxrs[i];
- if (target == current)
- restore_vx_regs(target->thread.vxrs);
- }
+ *((__u64 *)(target->thread.fpu.vxrs + i) + 1) = vxrs[i];
return rc;
}
@@ -1116,13 +1084,10 @@ static int s390_vxrs_high_get(struct task_struct *target,
if (!MACHINE_HAS_VX)
return -ENODEV;
- if (target->thread.vxrs) {
- if (target == current)
- save_vx_regs(target->thread.vxrs);
- memcpy(vxrs, target->thread.vxrs + __NUM_VXRS_LOW,
- sizeof(vxrs));
- } else
- memset(vxrs, 0, sizeof(vxrs));
+ if (target == current)
+ save_fpu_regs();
+ memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW, sizeof(vxrs));
+
return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
}
@@ -1135,18 +1100,11 @@ static int s390_vxrs_high_set(struct task_struct *target,
if (!MACHINE_HAS_VX)
return -ENODEV;
- if (!target->thread.vxrs) {
- rc = alloc_vector_registers(target);
- if (rc)
- return rc;
- } else if (target == current)
- save_vx_regs(target->thread.vxrs);
+ if (target == current)
+ save_fpu_regs();
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- target->thread.vxrs + __NUM_VXRS_LOW, 0, -1);
- if (rc == 0 && target == current)
- restore_vx_regs(target->thread.vxrs);
-
+ target->thread.fpu.vxrs + __NUM_VXRS_LOW, 0, -1);
return rc;
}
diff --git a/kernel/arch/s390/kernel/runtime_instr.c b/kernel/arch/s390/kernel/runtime_instr.c
index 26b4ae96f..fffa0e546 100644
--- a/kernel/arch/s390/kernel/runtime_instr.c
+++ b/kernel/arch/s390/kernel/runtime_instr.c
@@ -18,11 +18,6 @@
/* empty control block to disable RI by loading it */
struct runtime_instr_cb runtime_instr_empty_cb;
-static int runtime_instr_avail(void)
-{
- return test_facility(64);
-}
-
static void disable_runtime_instr(void)
{
struct pt_regs *regs = task_pt_regs(current);
@@ -40,7 +35,6 @@ static void disable_runtime_instr(void)
static void init_runtime_instr_cb(struct runtime_instr_cb *cb)
{
cb->buf_limit = 0xfff;
- cb->int_requested = 1;
cb->pstate = 1;
cb->pstate_set_buf = 1;
cb->pstate_sample = 1;
@@ -57,46 +51,14 @@ void exit_thread_runtime_instr(void)
return;
disable_runtime_instr();
kfree(task->thread.ri_cb);
- task->thread.ri_signum = 0;
task->thread.ri_cb = NULL;
}
-static void runtime_instr_int_handler(struct ext_code ext_code,
- unsigned int param32, unsigned long param64)
-{
- struct siginfo info;
-
- if (!(param32 & CPU_MF_INT_RI_MASK))
- return;
-
- inc_irq_stat(IRQEXT_CMR);
-
- if (!current->thread.ri_cb)
- return;
- if (current->thread.ri_signum < SIGRTMIN ||
- current->thread.ri_signum > SIGRTMAX) {
- WARN_ON_ONCE(1);
- return;
- }
-
- memset(&info, 0, sizeof(info));
- info.si_signo = current->thread.ri_signum;
- info.si_code = SI_QUEUE;
- if (param32 & CPU_MF_INT_RI_BUF_FULL)
- info.si_int = ENOBUFS;
- else if (param32 & CPU_MF_INT_RI_HALTED)
- info.si_int = ECANCELED;
- else
- return; /* unknown reason */
-
- send_sig_info(current->thread.ri_signum, &info, current);
-}
-
-SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
+SYSCALL_DEFINE1(s390_runtime_instr, int, command)
{
struct runtime_instr_cb *cb;
- if (!runtime_instr_avail())
+ if (!test_facility(64))
return -EOPNOTSUPP;
if (command == S390_RUNTIME_INSTR_STOP) {
@@ -106,8 +68,7 @@ SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
return 0;
}
- if (command != S390_RUNTIME_INSTR_START ||
- (signum < SIGRTMIN || signum > SIGRTMAX))
+ if (command != S390_RUNTIME_INSTR_START)
return -EINVAL;
if (!current->thread.ri_cb) {
@@ -120,7 +81,6 @@ SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
}
init_runtime_instr_cb(cb);
- current->thread.ri_signum = signum;
/* now load the control block to make it available */
preempt_disable();
@@ -129,21 +89,3 @@ SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
preempt_enable();
return 0;
}
-
-static int __init runtime_instr_init(void)
-{
- int rc;
-
- if (!runtime_instr_avail())
- return 0;
-
- irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
- rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
- runtime_instr_int_handler);
- if (rc)
- irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
- else
- pr_info("Runtime instrumentation facility initialized\n");
- return rc;
-}
-device_initcall(runtime_instr_init);
diff --git a/kernel/arch/s390/kernel/s390_ksyms.c b/kernel/arch/s390/kernel/s390_ksyms.c
index 9f6046793..e67453b73 100644
--- a/kernel/arch/s390/kernel/s390_ksyms.c
+++ b/kernel/arch/s390/kernel/s390_ksyms.c
@@ -1,5 +1,6 @@
#include <linux/module.h>
#include <linux/kvm_host.h>
+#include <asm/fpu/api.h>
#include <asm/ftrace.h>
#ifdef CONFIG_FUNCTION_TRACER
@@ -8,6 +9,7 @@ EXPORT_SYMBOL(_mcount);
#if IS_ENABLED(CONFIG_KVM)
EXPORT_SYMBOL(sie64a);
EXPORT_SYMBOL(sie_exit);
+EXPORT_SYMBOL(save_fpu_regs);
#endif
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memset);
diff --git a/kernel/arch/s390/kernel/sclp.S b/kernel/arch/s390/kernel/sclp.S
deleted file mode 100644
index ada0c07fe..000000000
--- a/kernel/arch/s390/kernel/sclp.S
+++ /dev/null
@@ -1,355 +0,0 @@
-/*
- * Mini SCLP driver.
- *
- * Copyright IBM Corp. 2004, 2009
- *
- * Author(s): Peter Oberparleiter <Peter.Oberparleiter@de.ibm.com>,
- * Heiko Carstens <heiko.carstens@de.ibm.com>,
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/irq.h>
-
-LC_EXT_NEW_PSW = 0x58 # addr of ext int handler
-LC_EXT_NEW_PSW_64 = 0x1b0 # addr of ext int handler 64 bit
-LC_EXT_INT_PARAM = 0x80 # addr of ext int parameter
-LC_EXT_INT_CODE = 0x86 # addr of ext int code
-LC_AR_MODE_ID = 0xa3
-
-#
-# Subroutine which waits synchronously until either an external interruption
-# or a timeout occurs.
-#
-# Parameters:
-# R2 = 0 for no timeout, non-zero for timeout in (approximated) seconds
-#
-# Returns:
-# R2 = 0 on interrupt, 2 on timeout
-# R3 = external interruption parameter if R2=0
-#
-
-_sclp_wait_int:
- stm %r6,%r15,24(%r15) # save registers
- basr %r13,0 # get base register
-.LbaseS1:
- ahi %r15,-96 # create stack frame
- la %r8,LC_EXT_NEW_PSW # register int handler
- la %r9,.LextpswS1-.LbaseS1(%r13)
- tm LC_AR_MODE_ID,1
- jno .Lesa1
- la %r8,LC_EXT_NEW_PSW_64 # register int handler 64 bit
- la %r9,.LextpswS1_64-.LbaseS1(%r13)
-.Lesa1:
- mvc .LoldpswS1-.LbaseS1(16,%r13),0(%r8)
- mvc 0(16,%r8),0(%r9)
- epsw %r6,%r7 # set current addressing mode
- nill %r6,0x1 # in new psw (31 or 64 bit mode)
- nilh %r7,0x8000
- stm %r6,%r7,0(%r8)
- lhi %r6,0x0200 # cr mask for ext int (cr0.54)
- ltr %r2,%r2
- jz .LsetctS1
- ahi %r6,0x0800 # cr mask for clock int (cr0.52)
- stck .LtimeS1-.LbaseS1(%r13) # initiate timeout
- al %r2,.LtimeS1-.LbaseS1(%r13)
- st %r2,.LtimeS1-.LbaseS1(%r13)
- sckc .LtimeS1-.LbaseS1(%r13)
-
-.LsetctS1:
- stctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # enable required interrupts
- l %r0,.LctlS1-.LbaseS1(%r13)
- lhi %r1,~(0x200 | 0x800) # clear old values
- nr %r1,%r0
- or %r1,%r6 # set new value
- st %r1,.LctlS1-.LbaseS1(%r13)
- lctl %c0,%c0,.LctlS1-.LbaseS1(%r13)
- st %r0,.LctlS1-.LbaseS1(%r13)
- lhi %r2,2 # return code for timeout
-.LloopS1:
- lpsw .LwaitpswS1-.LbaseS1(%r13) # wait until interrupt
-.LwaitS1:
- lh %r7,LC_EXT_INT_CODE
- chi %r7,EXT_IRQ_CLK_COMP # timeout?
- je .LtimeoutS1
- chi %r7,EXT_IRQ_SERVICE_SIG # service int?
- jne .LloopS1
- sr %r2,%r2
- l %r3,LC_EXT_INT_PARAM
-.LtimeoutS1:
- lctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # restore interrupt setting
- # restore old handler
- mvc 0(16,%r8),.LoldpswS1-.LbaseS1(%r13)
- lm %r6,%r15,120(%r15) # restore registers
- br %r14 # return to caller
-
- .align 8
-.LoldpswS1:
- .long 0, 0, 0, 0 # old ext int PSW
-.LextpswS1:
- .long 0x00080000, 0x80000000+.LwaitS1 # PSW to handle ext int
-.LextpswS1_64:
- .quad 0, .LwaitS1 # PSW to handle ext int, 64 bit
-.LwaitpswS1:
- .long 0x010a0000, 0x00000000+.LloopS1 # PSW to wait for ext int
-.LtimeS1:
- .quad 0 # current time
-.LctlS1:
- .long 0 # CT0 contents
-
-#
-# Subroutine to synchronously issue a service call.
-#
-# Parameters:
-# R2 = command word
-# R3 = sccb address
-#
-# Returns:
-# R2 = 0 on success, 1 on failure
-# R3 = sccb response code if R2 = 0
-#
-
-_sclp_servc:
- stm %r6,%r15,24(%r15) # save registers
- ahi %r15,-96 # create stack frame
- lr %r6,%r2 # save command word
- lr %r7,%r3 # save sccb address
-.LretryS2:
- lhi %r2,1 # error return code
- .insn rre,0xb2200000,%r6,%r7 # servc
- brc 1,.LendS2 # exit if not operational
- brc 8,.LnotbusyS2 # go on if not busy
- sr %r2,%r2 # wait until no longer busy
- bras %r14,_sclp_wait_int
- j .LretryS2 # retry
-.LnotbusyS2:
- sr %r2,%r2 # wait until result
- bras %r14,_sclp_wait_int
- sr %r2,%r2
- lh %r3,6(%r7)
-.LendS2:
- lm %r6,%r15,120(%r15) # restore registers
- br %r14
-
-#
-# Subroutine to set up the SCLP interface.
-#
-# Parameters:
-# R2 = 0 to activate, non-zero to deactivate
-#
-# Returns:
-# R2 = 0 on success, non-zero on failure
-#
-
-_sclp_setup:
- stm %r6,%r15,24(%r15) # save registers
- ahi %r15,-96 # create stack frame
- basr %r13,0 # get base register
-.LbaseS3:
- l %r6,.LsccbS0-.LbaseS3(%r13) # prepare init mask sccb
- mvc 0(.LinitendS3-.LinitsccbS3,%r6),.LinitsccbS3-.LbaseS3(%r13)
- ltr %r2,%r2 # initialization?
- jz .LdoinitS3 # go ahead
- # clear masks
- xc .LinitmaskS3-.LinitsccbS3(8,%r6),.LinitmaskS3-.LinitsccbS3(%r6)
-.LdoinitS3:
- l %r2,.LwritemaskS3-.LbaseS3(%r13)# get command word
- lr %r3,%r6 # get sccb address
- bras %r14,_sclp_servc # issue service call
- ltr %r2,%r2 # servc successful?
- jnz .LerrorS3
- chi %r3,0x20 # write mask successful?
- jne .LerrorS3
- # check masks
- la %r2,.LinitmaskS3-.LinitsccbS3(%r6)
- l %r1,0(%r2) # receive mask ok?
- n %r1,12(%r2)
- cl %r1,0(%r2)
- jne .LerrorS3
- l %r1,4(%r2) # send mask ok?
- n %r1,8(%r2)
- cl %r1,4(%r2)
- sr %r2,%r2
- je .LendS3
-.LerrorS3:
- lhi %r2,1 # error return code
-.LendS3:
- lm %r6,%r15,120(%r15) # restore registers
- br %r14
-.LwritemaskS3:
- .long 0x00780005 # SCLP command for write mask
-.LinitsccbS3:
- .word .LinitendS3-.LinitsccbS3
- .byte 0,0,0,0
- .word 0
- .word 0
- .word 4
-.LinitmaskS3:
- .long 0x80000000
- .long 0x40000000
- .long 0
- .long 0
-.LinitendS3:
-
-#
-# Subroutine which prints a given text to the SCLP console.
-#
-# Parameters:
-# R2 = address of nil-terminated ASCII text
-#
-# Returns:
-# R2 = 0 on success, 1 on failure
-#
-
-_sclp_print:
- stm %r6,%r15,24(%r15) # save registers
- ahi %r15,-96 # create stack frame
- basr %r13,0 # get base register
-.LbaseS4:
- l %r8,.LsccbS0-.LbaseS4(%r13) # prepare write data sccb
- mvc 0(.LmtoS4-.LwritesccbS4,%r8),.LwritesccbS4-.LbaseS4(%r13)
- la %r7,.LmtoS4-.LwritesccbS4(%r8) # current mto addr
- sr %r0,%r0
- l %r10,.Lascebc-.LbaseS4(%r13) # address of translation table
-.LinitmtoS4:
- # initialize mto
- mvc 0(.LmtoendS4-.LmtoS4,%r7),.LmtoS4-.LbaseS4(%r13)
- lhi %r6,.LmtoendS4-.LmtoS4 # current mto length
-.LloopS4:
- ic %r0,0(%r2) # get character
- ahi %r2,1
- ltr %r0,%r0 # end of string?
- jz .LfinalizemtoS4
- chi %r0,0x0a # end of line (NL)?
- jz .LfinalizemtoS4
- stc %r0,0(%r6,%r7) # copy to mto
- la %r11,0(%r6,%r7)
- tr 0(1,%r11),0(%r10) # translate to EBCDIC
- ahi %r6,1
- j .LloopS4
-.LfinalizemtoS4:
- sth %r6,0(%r7) # update mto length
- lh %r9,.LmdbS4-.LwritesccbS4(%r8) # update mdb length
- ar %r9,%r6
- sth %r9,.LmdbS4-.LwritesccbS4(%r8)
- lh %r9,.LevbufS4-.LwritesccbS4(%r8)# update evbuf length
- ar %r9,%r6
- sth %r9,.LevbufS4-.LwritesccbS4(%r8)
- lh %r9,0(%r8) # update sccb length
- ar %r9,%r6
- sth %r9,0(%r8)
- ar %r7,%r6 # update current mto address
- ltr %r0,%r0 # more characters?
- jnz .LinitmtoS4
- l %r2,.LwritedataS4-.LbaseS4(%r13)# write data
- lr %r3,%r8
- bras %r14,_sclp_servc
- ltr %r2,%r2 # servc successful?
- jnz .LendS4
- chi %r3,0x20 # write data successful?
- je .LendS4
- lhi %r2,1 # error return code
-.LendS4:
- lm %r6,%r15,120(%r15) # restore registers
- br %r14
-
-#
-# Function which prints a given text to the SCLP console.
-#
-# Parameters:
-# R2 = address of nil-terminated ASCII text
-#
-# Returns:
-# R2 = 0 on success, 1 on failure
-#
-
-ENTRY(_sclp_print_early)
- stm %r6,%r15,24(%r15) # save registers
- ahi %r15,-96 # create stack frame
- tm LC_AR_MODE_ID,1
- jno .Lesa2
- ahi %r15,-80
- stmh %r6,%r15,96(%r15) # store upper register halves
- basr %r13,0
- lmh %r0,%r15,.Lzeroes-.(%r13) # clear upper register halves
-.Lesa2:
- lr %r10,%r2 # save string pointer
- lhi %r2,0
- bras %r14,_sclp_setup # enable console
- ltr %r2,%r2
- jnz .LendS5
- lr %r2,%r10
- bras %r14,_sclp_print # print string
- ltr %r2,%r2
- jnz .LendS5
- lhi %r2,1
- bras %r14,_sclp_setup # disable console
-.LendS5:
- tm LC_AR_MODE_ID,1
- jno .Lesa3
- lgfr %r2,%r2 # sign extend return value
- lmh %r6,%r15,96(%r15) # restore upper register halves
- ahi %r15,80
-.Lesa3:
- lm %r6,%r15,120(%r15) # restore registers
- br %r14
-.Lzeroes:
- .fill 64,4,0
-
-.LwritedataS4:
- .long 0x00760005 # SCLP command for write data
-.LwritesccbS4:
- # sccb
- .word .LmtoS4-.LwritesccbS4
- .byte 0
- .byte 0,0,0
- .word 0
-
- # evbuf
-.LevbufS4:
- .word .LmtoS4-.LevbufS4
- .byte 0x02
- .byte 0
- .word 0
-
-.LmdbS4:
- # mdb
- .word .LmtoS4-.LmdbS4
- .word 1
- .long 0xd4c4c240
- .long 1
-
- # go
-.LgoS4:
- .word .LmtoS4-.LgoS4
- .word 1
- .long 0
- .byte 0,0,0,0,0,0,0,0
- .byte 0,0,0
- .byte 0
- .byte 0,0,0,0,0,0,0
- .byte 0
- .word 0
- .byte 0,0,0,0,0,0,0,0,0,0
- .byte 0,0,0,0,0,0,0,0
- .byte 0,0,0,0,0,0,0,0
-
-.LmtoS4:
- .word .LmtoendS4-.LmtoS4
- .word 4
- .word 0x1000
- .byte 0
- .byte 0,0,0
-.LmtoendS4:
-
- # Global constants
-.LsccbS0:
- .long _sclp_work_area
-.Lascebc:
- .long _ascebc
-
-.section .data,"aw",@progbits
- .balign 4096
-_sclp_work_area:
- .fill 4096
-.previous
diff --git a/kernel/arch/s390/kernel/sclp.c b/kernel/arch/s390/kernel/sclp.c
new file mode 100644
index 000000000..9fe7781a4
--- /dev/null
+++ b/kernel/arch/s390/kernel/sclp.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright IBM Corp. 2015
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <asm/ebcdic.h>
+#include <asm/irq.h>
+#include <asm/lowcore.h>
+#include <asm/processor.h>
+#include <asm/sclp.h>
+
+static char _sclp_work_area[4096] __aligned(PAGE_SIZE);
+
+static void _sclp_wait_int(void)
+{
+ unsigned long cr0, cr0_new, psw_mask, addr;
+ psw_t psw_ext_save, psw_wait;
+
+ __ctl_store(cr0, 0, 0);
+ cr0_new = cr0 | 0x200;
+ __ctl_load(cr0_new, 0, 0);
+
+ psw_ext_save = S390_lowcore.external_new_psw;
+ psw_mask = __extract_psw();
+ S390_lowcore.external_new_psw.mask = psw_mask;
+ psw_wait.mask = psw_mask | PSW_MASK_EXT | PSW_MASK_WAIT;
+ S390_lowcore.ext_int_code = 0;
+
+ do {
+ asm volatile(
+ " larl %[addr],0f\n"
+ " stg %[addr],%[psw_wait_addr]\n"
+ " stg %[addr],%[psw_ext_addr]\n"
+ " lpswe %[psw_wait]\n"
+ "0:\n"
+ : [addr] "=&d" (addr),
+ [psw_wait_addr] "=Q" (psw_wait.addr),
+ [psw_ext_addr] "=Q" (S390_lowcore.external_new_psw.addr)
+ : [psw_wait] "Q" (psw_wait)
+ : "cc", "memory");
+ } while (S390_lowcore.ext_int_code != EXT_IRQ_SERVICE_SIG);
+
+ __ctl_load(cr0, 0, 0);
+ S390_lowcore.external_new_psw = psw_ext_save;
+}
+
+static int _sclp_servc(unsigned int cmd, char *sccb)
+{
+ unsigned int cc;
+
+ do {
+ asm volatile(
+ " .insn rre,0xb2200000,%1,%2\n"
+ " ipm %0\n"
+ : "=d" (cc) : "d" (cmd), "a" (sccb)
+ : "cc", "memory");
+ cc >>= 28;
+ if (cc == 3)
+ return -EINVAL;
+ _sclp_wait_int();
+ } while (cc != 0);
+ return (*(unsigned short *)(sccb + 6) == 0x20) ? 0 : -EIO;
+}
+
+static int _sclp_setup(int disable)
+{
+ static unsigned char init_sccb[] = {
+ 0x00, 0x1c,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x04,
+ 0x80, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ };
+ unsigned int *masks;
+ int rc;
+
+ memcpy(_sclp_work_area, init_sccb, 28);
+ masks = (unsigned int *)(_sclp_work_area + 12);
+ if (disable)
+ memset(masks, 0, 16);
+ /* SCLP write mask */
+ rc = _sclp_servc(0x00780005, _sclp_work_area);
+ if (rc)
+ return rc;
+ if ((masks[0] & masks[3]) != masks[0] ||
+ (masks[1] & masks[2]) != masks[1])
+ return -EIO;
+ return 0;
+}
+
+static int _sclp_print(const char *str)
+{
+ static unsigned char write_head[] = {
+ /* sccb header */
+ 0x00, 0x52, /* 0 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 2 */
+ /* evbuf */
+ 0x00, 0x4a, /* 8 */
+ 0x02, 0x00, 0x00, 0x00, /* 10 */
+ /* mdb */
+ 0x00, 0x44, /* 14 */
+ 0x00, 0x01, /* 16 */
+ 0xd4, 0xc4, 0xc2, 0x40, /* 18 */
+ 0x00, 0x00, 0x00, 0x01, /* 22 */
+ /* go */
+ 0x00, 0x38, /* 26 */
+ 0x00, 0x01, /* 28 */
+ 0x00, 0x00, 0x00, 0x00, /* 30 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 34 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 42 */
+ 0x00, 0x00, 0x00, 0x00, /* 50 */
+ 0x00, 0x00, /* 54 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 56 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 64 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 72 */
+ 0x00, 0x00, /* 80 */
+ };
+ static unsigned char write_mto[] = {
+ /* mto */
+ 0x00, 0x0a, /* 0 */
+ 0x00, 0x04, /* 2 */
+ 0x10, 0x00, /* 4 */
+ 0x00, 0x00, 0x00, 0x00 /* 6 */
+ };
+ unsigned char *ptr, ch;
+ unsigned int count;
+
+ memcpy(_sclp_work_area, write_head, sizeof(write_head));
+ ptr = _sclp_work_area + sizeof(write_head);
+ do {
+ memcpy(ptr, write_mto, sizeof(write_mto));
+ for (count = sizeof(write_mto); (ch = *str++) != 0; count++) {
+ if (ch == 0x0a)
+ break;
+ ptr[count] = _ascebc[ch];
+ }
+ /* Update length fields in mto, mdb, evbuf and sccb */
+ *(unsigned short *) ptr = count;
+ *(unsigned short *)(_sclp_work_area + 14) += count;
+ *(unsigned short *)(_sclp_work_area + 8) += count;
+ *(unsigned short *)(_sclp_work_area + 0) += count;
+ ptr += count;
+ } while (ch != 0);
+
+ /* SCLP write data */
+ return _sclp_servc(0x00760005, _sclp_work_area);
+}
+
+int _sclp_print_early(const char *str)
+{
+ int rc;
+
+ rc = _sclp_setup(0);
+ if (rc)
+ return rc;
+ rc = _sclp_print(str);
+ if (rc)
+ return rc;
+ return _sclp_setup(1);
+}
diff --git a/kernel/arch/s390/kernel/setup.c b/kernel/arch/s390/kernel/setup.c
index 1942f22e6..c837bcacf 100644
--- a/kernel/arch/s390/kernel/setup.c
+++ b/kernel/arch/s390/kernel/setup.c
@@ -62,6 +62,7 @@
#include <asm/os_info.h>
#include <asm/sclp.h>
#include <asm/sysinfo.h>
+#include <asm/numa.h>
#include "entry.h"
/*
@@ -76,7 +77,7 @@ EXPORT_SYMBOL(console_devno);
unsigned int console_irq = -1;
EXPORT_SYMBOL(console_irq);
-unsigned long elf_hwcap = 0;
+unsigned long elf_hwcap __read_mostly = 0;
char elf_platform[ELF_PLATFORM_SIZE];
int __initdata memory_end_set;
@@ -128,9 +129,9 @@ __setup("condev=", condev_setup);
static void __init set_preferred_console(void)
{
if (MACHINE_IS_KVM) {
- if (sclp_has_vt220())
+ if (sclp.has_vt220)
add_preferred_console("ttyS", 1, NULL);
- else if (sclp_has_linemode())
+ else if (sclp.has_linemode)
add_preferred_console("ttyS", 0, NULL);
else
add_preferred_console("hvc", 0, NULL);
@@ -510,8 +511,8 @@ static void reserve_memory_end(void)
{
#ifdef CONFIG_CRASH_DUMP
if (ipl_info.type == IPL_TYPE_FCP_DUMP &&
- !OLDMEM_BASE && sclp_get_hsa_size()) {
- memory_end = sclp_get_hsa_size();
+ !OLDMEM_BASE && sclp.hsa_size) {
+ memory_end = sclp.hsa_size;
memory_end &= PAGE_MASK;
memory_end_set = 1;
}
@@ -576,7 +577,7 @@ static void __init reserve_crashkernel(void)
crash_base = low;
} else {
/* Find suitable area in free memory */
- low = max_t(unsigned long, crash_size, sclp_get_hsa_size());
+ low = max_t(unsigned long, crash_size, sclp.hsa_size);
high = crash_base ? crash_base + crash_size : ULONG_MAX;
if (crash_base && crash_base < low) {
@@ -640,19 +641,24 @@ static void __init check_initrd(void)
}
/*
- * Reserve all kernel text
+ * Reserve memory used for lowcore/command line/kernel image.
*/
static void __init reserve_kernel(void)
{
- unsigned long start_pfn;
- start_pfn = PFN_UP(__pa(&_end));
+ unsigned long start_pfn = PFN_UP(__pa(&_end));
+#ifdef CONFIG_DMA_API_DEBUG
/*
- * Reserve memory used for lowcore/command line/kernel image.
+ * DMA_API_DEBUG code stumbles over addresses from the
+ * range [_ehead, _stext]. Mark the memory as reserved
+ * so it is not used for CONFIG_DMA_API_DEBUG=y.
*/
+ memblock_reserve(0, PFN_PHYS(start_pfn));
+#else
memblock_reserve(0, (unsigned long)_ehead);
memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
- (unsigned long)_stext);
+#endif
}
static void __init reserve_elfcorehdr(void)
@@ -758,9 +764,6 @@ static int __init setup_hwcaps(void)
get_cpu_id(&cpu_id);
add_device_randomness(&cpu_id, sizeof(cpu_id));
switch (cpu_id.machine) {
- case 0x9672:
- strcpy(elf_platform, "g5");
- break;
case 0x2064:
case 0x2066:
default: /* Use "z900" as default for 64 bit kernels. */
@@ -867,12 +870,18 @@ void __init setup_arch(char **cmdline_p)
check_initrd();
reserve_crashkernel();
+ /*
+ * Be aware that smp_save_dump_cpus() triggers a system reset.
+ * Therefore CPU and device initialization should be done afterwards.
+ */
+ smp_save_dump_cpus();
setup_resources();
setup_vmcoreinfo();
setup_lowcore();
smp_fill_possible_mask();
cpu_init();
+ numa_setup();
/*
* Create kernel page tables and switch to virtual addressing.
diff --git a/kernel/arch/s390/kernel/signal.c b/kernel/arch/s390/kernel/signal.c
index c551f22ce..028cc46cb 100644
--- a/kernel/arch/s390/kernel/signal.c
+++ b/kernel/arch/s390/kernel/signal.c
@@ -105,32 +105,13 @@ struct rt_sigframe
static void store_sigregs(void)
{
save_access_regs(current->thread.acrs);
- save_fp_ctl(&current->thread.fp_regs.fpc);
- if (current->thread.vxrs) {
- int i;
-
- save_vx_regs(current->thread.vxrs);
- for (i = 0; i < __NUM_FPRS; i++)
- current->thread.fp_regs.fprs[i] =
- *(freg_t *)(current->thread.vxrs + i);
- } else
- save_fp_regs(current->thread.fp_regs.fprs);
+ save_fpu_regs();
}
/* Load registers after signal return */
static void load_sigregs(void)
{
restore_access_regs(current->thread.acrs);
- /* restore_fp_ctl is done in restore_sigregs */
- if (current->thread.vxrs) {
- int i;
-
- for (i = 0; i < __NUM_FPRS; i++)
- *(freg_t *)(current->thread.vxrs + i) =
- current->thread.fp_regs.fprs[i];
- restore_vx_regs(current->thread.vxrs);
- } else
- restore_fp_regs(current->thread.fp_regs.fprs);
}
/* Returns non-zero on fault. */
@@ -146,8 +127,7 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
memcpy(&user_sregs.regs.acrs, current->thread.acrs,
sizeof(user_sregs.regs.acrs));
- memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
- sizeof(user_sregs.fpregs));
+ fpregs_store(&user_sregs.fpregs, &current->thread.fpu);
if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs)))
return -EFAULT;
return 0;
@@ -166,8 +146,8 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI))
return -EINVAL;
- /* Loading the floating-point-control word can fail. Do that first. */
- if (restore_fp_ctl(&user_sregs.fpregs.fpc))
+ /* Test the floating-point-control word. */
+ if (test_fp_ctl(user_sregs.fpregs.fpc))
return -EINVAL;
/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
@@ -185,8 +165,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
sizeof(current->thread.acrs));
- memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
- sizeof(current->thread.fp_regs));
+ fpregs_load(&user_sregs.fpregs, &current->thread.fpu);
clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
return 0;
@@ -200,13 +179,13 @@ static int save_sigregs_ext(struct pt_regs *regs,
int i;
/* Save vector registers to signal stack */
- if (current->thread.vxrs) {
+ if (MACHINE_HAS_VX) {
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1);
+ vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
sizeof(sregs_ext->vxrs_low)) ||
__copy_to_user(&sregs_ext->vxrs_high,
- current->thread.vxrs + __NUM_VXRS_LOW,
+ current->thread.fpu.vxrs + __NUM_VXRS_LOW,
sizeof(sregs_ext->vxrs_high)))
return -EFAULT;
}
@@ -220,15 +199,15 @@ static int restore_sigregs_ext(struct pt_regs *regs,
int i;
/* Restore vector registers from signal stack */
- if (current->thread.vxrs) {
+ if (MACHINE_HAS_VX) {
if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
sizeof(sregs_ext->vxrs_low)) ||
- __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW,
+ __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
&sregs_ext->vxrs_high,
sizeof(sregs_ext->vxrs_high)))
return -EFAULT;
for (i = 0; i < __NUM_VXRS_LOW; i++)
- *((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
+ *((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
}
return 0;
}
@@ -243,6 +222,7 @@ SYSCALL_DEFINE0(sigreturn)
if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
goto badframe;
set_current_blocked(&set);
+ save_fpu_regs();
if (restore_sigregs(regs, &frame->sregs))
goto badframe;
if (restore_sigregs_ext(regs, &frame->sregs_ext))
@@ -266,6 +246,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
set_current_blocked(&set);
if (restore_altstack(&frame->uc.uc_stack))
goto badframe;
+ save_fpu_regs();
if (restore_sigregs(regs, &frame->uc.uc_mcontext))
goto badframe;
if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
@@ -400,8 +381,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
uc_flags = 0;
if (MACHINE_HAS_VX) {
frame_size += sizeof(_sigregs_ext);
- if (current->thread.vxrs)
- uc_flags |= UC_VXRS;
+ uc_flags |= UC_VXRS;
}
frame = get_sigframe(&ksig->ka, regs, frame_size);
if (frame == (void __user *) -1UL)
diff --git a/kernel/arch/s390/kernel/smp.c b/kernel/arch/s390/kernel/smp.c
index efd2c1968..9062df575 100644
--- a/kernel/arch/s390/kernel/smp.c
+++ b/kernel/arch/s390/kernel/smp.c
@@ -31,7 +31,9 @@
#include <linux/cpu.h>
#include <linux/slab.h>
#include <linux/crash_dump.h>
+#include <linux/memblock.h>
#include <asm/asm-offsets.h>
+#include <asm/diag.h>
#include <asm/switch_to.h>
#include <asm/facility.h>
#include <asm/ipl.h>
@@ -69,7 +71,7 @@ struct pcpu {
u16 address; /* physical cpu address */
};
-static u8 boot_cpu_type;
+static u8 boot_core_type;
static struct pcpu pcpu_devices[NR_CPUS];
unsigned int smp_cpu_mt_shift;
@@ -260,6 +262,8 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->thread_info = (unsigned long) task_thread_info(tsk);
lc->current_task = (unsigned long) tsk;
+ lc->lpp = LPP_MAGIC;
+ lc->current_pid = tsk->pid;
lc->user_timer = ti->user_timer;
lc->system_timer = ti->system_timer;
lc->steal_timer = 0;
@@ -374,11 +378,14 @@ int smp_vcpu_scheduled(int cpu)
void smp_yield_cpu(int cpu)
{
- if (MACHINE_HAS_DIAG9C)
+ if (MACHINE_HAS_DIAG9C) {
+ diag_stat_inc_norecursion(DIAG_STAT_X09C);
asm volatile("diag %0,0,0x9c"
: : "d" (pcpu_devices[cpu].address));
- else if (MACHINE_HAS_DIAG44)
+ } else if (MACHINE_HAS_DIAG44) {
+ diag_stat_inc_norecursion(DIAG_STAT_X044);
asm volatile("diag 0,0,0x44");
+ }
}
/*
@@ -531,15 +538,12 @@ EXPORT_SYMBOL(smp_ctl_clear_bit);
#ifdef CONFIG_CRASH_DUMP
-static inline void __smp_store_cpu_state(int cpu, u16 address, int is_boot_cpu)
+static void __init __smp_store_cpu_state(struct save_area_ext *sa_ext,
+ u16 address, int is_boot_cpu)
{
- void *lc = pcpu_devices[0].lowcore;
- struct save_area_ext *sa_ext;
+ void *lc = (void *)(unsigned long) store_prefix();
unsigned long vx_sa;
- sa_ext = dump_save_area_create(cpu);
- if (!sa_ext)
- panic("could not allocate memory for save area\n");
if (is_boot_cpu) {
/* Copy the registers of the boot CPU. */
copy_oldmem_page(1, (void *) &sa_ext->sa, sizeof(sa_ext->sa),
@@ -554,14 +558,33 @@ static inline void __smp_store_cpu_state(int cpu, u16 address, int is_boot_cpu)
if (!MACHINE_HAS_VX)
return;
/* Get the VX registers */
- vx_sa = __get_free_page(GFP_KERNEL);
+ vx_sa = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
if (!vx_sa)
panic("could not allocate memory for VX save area\n");
__pcpu_sigp_relax(address, SIGP_STORE_ADDITIONAL_STATUS, vx_sa, NULL);
memcpy(sa_ext->vx_regs, (void *) vx_sa, sizeof(sa_ext->vx_regs));
- free_page(vx_sa);
+ memblock_free(vx_sa, PAGE_SIZE);
+}
+
+int smp_store_status(int cpu)
+{
+ unsigned long vx_sa;
+ struct pcpu *pcpu;
+
+ pcpu = pcpu_devices + cpu;
+ if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS,
+ 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED)
+ return -EIO;
+ if (!MACHINE_HAS_VX)
+ return 0;
+ vx_sa = __pa(pcpu->lowcore->vector_save_area_addr);
+ __pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
+ vx_sa, NULL);
+ return 0;
}
+#endif /* CONFIG_CRASH_DUMP */
+
/*
* Collect CPU state of the previous, crashed system.
* There are four cases:
@@ -589,10 +612,12 @@ static inline void __smp_store_cpu_state(int cpu, u16 address, int is_boot_cpu)
* old system. The ELF sections are picked up by the crash_dump code
* via elfcorehdr_addr.
*/
-static void __init smp_store_cpu_states(struct sclp_cpu_info *info)
+void __init smp_save_dump_cpus(void)
{
- unsigned int cpu, address, i, j;
- int is_boot_cpu;
+#ifdef CONFIG_CRASH_DUMP
+ int addr, cpu, boot_cpu_addr, max_cpu_addr;
+ struct save_area_ext *sa_ext;
+ bool is_boot_cpu;
if (is_kdump_kernel())
/* Previous system stored the CPU states. Nothing to do. */
@@ -601,43 +626,37 @@ static void __init smp_store_cpu_states(struct sclp_cpu_info *info)
/* No previous system present, normal boot. */
return;
/* Set multi-threading state to the previous system. */
- pcpu_set_smt(sclp_get_mtid_prev());
- /* Collect CPU states. */
- cpu = 0;
- for (i = 0; i < info->configured; i++) {
- /* Skip CPUs with different CPU type. */
- if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
+ pcpu_set_smt(sclp.mtid_prev);
+ max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev;
+ for (cpu = 0, addr = 0; addr <= max_cpu_addr; addr++) {
+ if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0, NULL) ==
+ SIGP_CC_NOT_OPERATIONAL)
continue;
- for (j = 0; j <= smp_cpu_mtid; j++, cpu++) {
- address = (info->cpu[i].core_id << smp_cpu_mt_shift) + j;
- is_boot_cpu = (address == pcpu_devices[0].address);
- if (is_boot_cpu && !OLDMEM_BASE)
- /* Skip boot CPU for standard zfcp dump. */
- continue;
- /* Get state for this CPu. */
- __smp_store_cpu_state(cpu, address, is_boot_cpu);
- }
+ cpu += 1;
}
-}
-
-int smp_store_status(int cpu)
-{
- unsigned long vx_sa;
- struct pcpu *pcpu;
-
- pcpu = pcpu_devices + cpu;
- if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS,
- 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED)
- return -EIO;
- if (!MACHINE_HAS_VX)
- return 0;
- vx_sa = __pa(pcpu->lowcore->vector_save_area_addr);
- __pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
- vx_sa, NULL);
- return 0;
-}
-
+ dump_save_areas.areas = (void *)memblock_alloc(sizeof(void *) * cpu, 8);
+ dump_save_areas.count = cpu;
+ boot_cpu_addr = stap();
+ for (cpu = 0, addr = 0; addr <= max_cpu_addr; addr++) {
+ if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0, NULL) ==
+ SIGP_CC_NOT_OPERATIONAL)
+ continue;
+ sa_ext = (void *) memblock_alloc(sizeof(*sa_ext), 8);
+ dump_save_areas.areas[cpu] = sa_ext;
+ if (!sa_ext)
+ panic("could not allocate memory for save area\n");
+ is_boot_cpu = (addr == boot_cpu_addr);
+ cpu += 1;
+ if (is_boot_cpu && !OLDMEM_BASE)
+ /* Skip boot CPU for standard zfcp dump. */
+ continue;
+ /* Get state for this CPU. */
+ __smp_store_cpu_state(sa_ext, addr, is_boot_cpu);
+ }
+ diag308_reset();
+ pcpu_set_smt(0);
#endif /* CONFIG_CRASH_DUMP */
+}
void smp_cpu_set_polarization(int cpu, int val)
{
@@ -649,21 +668,22 @@ int smp_cpu_get_polarization(int cpu)
return pcpu_devices[cpu].polarization;
}
-static struct sclp_cpu_info *smp_get_cpu_info(void)
+static struct sclp_core_info *smp_get_core_info(void)
{
static int use_sigp_detection;
- struct sclp_cpu_info *info;
+ struct sclp_core_info *info;
int address;
info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (info && (use_sigp_detection || sclp_get_cpu_info(info))) {
+ if (info && (use_sigp_detection || sclp_get_core_info(info))) {
use_sigp_detection = 1;
- for (address = 0; address <= MAX_CPU_ADDRESS;
+ for (address = 0;
+ address < (SCLP_MAX_CORES << smp_cpu_mt_shift);
address += (1U << smp_cpu_mt_shift)) {
if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) ==
SIGP_CC_NOT_OPERATIONAL)
continue;
- info->cpu[info->configured].core_id =
+ info->core[info->configured].core_id =
address >> smp_cpu_mt_shift;
info->configured++;
}
@@ -674,7 +694,7 @@ static struct sclp_cpu_info *smp_get_cpu_info(void)
static int smp_add_present_cpu(int cpu);
-static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add)
+static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add)
{
struct pcpu *pcpu;
cpumask_t avail;
@@ -685,9 +705,9 @@ static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add)
cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
cpu = cpumask_first(&avail);
for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
- if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
+ if (sclp.has_core_type && info->core[i].type != boot_core_type)
continue;
- address = info->cpu[i].core_id << smp_cpu_mt_shift;
+ address = info->core[i].core_id << smp_cpu_mt_shift;
for (j = 0; j <= smp_cpu_mtid; j++) {
if (pcpu_find_address(cpu_present_mask, address + j))
continue;
@@ -713,41 +733,37 @@ static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add)
static void __init smp_detect_cpus(void)
{
unsigned int cpu, mtid, c_cpus, s_cpus;
- struct sclp_cpu_info *info;
+ struct sclp_core_info *info;
u16 address;
/* Get CPU information */
- info = smp_get_cpu_info();
+ info = smp_get_core_info();
if (!info)
panic("smp_detect_cpus failed to allocate memory\n");
/* Find boot CPU type */
- if (info->has_cpu_type) {
+ if (sclp.has_core_type) {
address = stap();
for (cpu = 0; cpu < info->combined; cpu++)
- if (info->cpu[cpu].core_id == address) {
+ if (info->core[cpu].core_id == address) {
/* The boot cpu dictates the cpu type. */
- boot_cpu_type = info->cpu[cpu].type;
+ boot_core_type = info->core[cpu].type;
break;
}
if (cpu >= info->combined)
panic("Could not find boot CPU type");
}
-#ifdef CONFIG_CRASH_DUMP
- /* Collect CPU state of previous system */
- smp_store_cpu_states(info);
-#endif
-
/* Set multi-threading state for the current system */
- mtid = sclp_get_mtid(boot_cpu_type);
+ mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp;
mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1;
pcpu_set_smt(mtid);
/* Print number of CPUs */
c_cpus = s_cpus = 0;
for (cpu = 0; cpu < info->combined; cpu++) {
- if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type)
+ if (sclp.has_core_type &&
+ info->core[cpu].type != boot_core_type)
continue;
if (cpu < info->configured)
c_cpus += smp_cpu_mtid + 1;
@@ -880,12 +896,13 @@ void __noreturn cpu_die(void)
void __init smp_fill_possible_mask(void)
{
- unsigned int possible, sclp, cpu;
+ unsigned int possible, sclp_max, cpu;
- sclp = min(smp_max_threads, sclp_get_mtid_max() + 1);
- sclp = sclp_get_max_cpu()*sclp ?: nr_cpu_ids;
+ sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1;
+ sclp_max = min(smp_max_threads, sclp_max);
+ sclp_max = sclp.max_cores * sclp_max ?: nr_cpu_ids;
possible = setup_possible_cpus ?: nr_cpu_ids;
- possible = min(possible, sclp);
+ possible = min(possible, sclp_max);
for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
set_cpu_possible(cpu, true);
}
@@ -976,7 +993,7 @@ static ssize_t cpu_configure_store(struct device *dev,
case 0:
if (pcpu->state != CPU_STATE_CONFIGURED)
break;
- rc = sclp_cpu_deconfigure(pcpu->address >> smp_cpu_mt_shift);
+ rc = sclp_core_deconfigure(pcpu->address >> smp_cpu_mt_shift);
if (rc)
break;
for (i = 0; i <= smp_cpu_mtid; i++) {
@@ -991,7 +1008,7 @@ static ssize_t cpu_configure_store(struct device *dev,
case 1:
if (pcpu->state != CPU_STATE_STANDBY)
break;
- rc = sclp_cpu_configure(pcpu->address >> smp_cpu_mt_shift);
+ rc = sclp_core_configure(pcpu->address >> smp_cpu_mt_shift);
if (rc)
break;
for (i = 0; i <= smp_cpu_mtid; i++) {
@@ -1106,10 +1123,10 @@ out:
int __ref smp_rescan_cpus(void)
{
- struct sclp_cpu_info *info;
+ struct sclp_core_info *info;
int nr;
- info = smp_get_cpu_info();
+ info = smp_get_core_info();
if (!info)
return -ENOMEM;
get_online_cpus();
diff --git a/kernel/arch/s390/kernel/suspend.c b/kernel/arch/s390/kernel/suspend.c
index d3236c9e2..39e2f41b6 100644
--- a/kernel/arch/s390/kernel/suspend.c
+++ b/kernel/arch/s390/kernel/suspend.c
@@ -9,10 +9,10 @@
#include <linux/pfn.h>
#include <linux/suspend.h>
#include <linux/mm.h>
+#include <linux/pci.h>
#include <asm/ctl_reg.h>
#include <asm/ipl.h>
#include <asm/cio.h>
-#include <asm/pci.h>
#include <asm/sections.h>
#include "entry.h"
diff --git a/kernel/arch/s390/kernel/swsusp.S b/kernel/arch/s390/kernel/swsusp.S
index ca6294645..2d6b6e81f 100644
--- a/kernel/arch/s390/kernel/swsusp.S
+++ b/kernel/arch/s390/kernel/swsusp.S
@@ -30,6 +30,9 @@ ENTRY(swsusp_arch_suspend)
aghi %r15,-STACK_FRAME_OVERHEAD
stg %r1,__SF_BACKCHAIN(%r15)
+ /* Store FPU registers */
+ brasl %r14,save_fpu_regs
+
/* Deactivate DAT */
stnsm __SF_EMPTY(%r15),0xfb
@@ -47,23 +50,6 @@ ENTRY(swsusp_arch_suspend)
/* Store registers */
mvc 0x318(4,%r1),__SF_EMPTY(%r15) /* move prefix to lowcore */
- stfpc 0x31c(%r1) /* store fpu control */
- std 0,0x200(%r1) /* store f0 */
- std 1,0x208(%r1) /* store f1 */
- std 2,0x210(%r1) /* store f2 */
- std 3,0x218(%r1) /* store f3 */
- std 4,0x220(%r1) /* store f4 */
- std 5,0x228(%r1) /* store f5 */
- std 6,0x230(%r1) /* store f6 */
- std 7,0x238(%r1) /* store f7 */
- std 8,0x240(%r1) /* store f8 */
- std 9,0x248(%r1) /* store f9 */
- std 10,0x250(%r1) /* store f10 */
- std 11,0x258(%r1) /* store f11 */
- std 12,0x260(%r1) /* store f12 */
- std 13,0x268(%r1) /* store f13 */
- std 14,0x270(%r1) /* store f14 */
- std 15,0x278(%r1) /* store f15 */
stam %a0,%a15,0x340(%r1) /* store access registers */
stctg %c0,%c15,0x380(%r1) /* store control registers */
stmg %r0,%r15,0x280(%r1) /* store general registers */
@@ -249,24 +235,6 @@ restore_registers:
lctlg %c0,%c15,0x380(%r13) /* load control registers */
lam %a0,%a15,0x340(%r13) /* load access registers */
- lfpc 0x31c(%r13) /* load fpu control */
- ld 0,0x200(%r13) /* load f0 */
- ld 1,0x208(%r13) /* load f1 */
- ld 2,0x210(%r13) /* load f2 */
- ld 3,0x218(%r13) /* load f3 */
- ld 4,0x220(%r13) /* load f4 */
- ld 5,0x228(%r13) /* load f5 */
- ld 6,0x230(%r13) /* load f6 */
- ld 7,0x238(%r13) /* load f7 */
- ld 8,0x240(%r13) /* load f8 */
- ld 9,0x248(%r13) /* load f9 */
- ld 10,0x250(%r13) /* load f10 */
- ld 11,0x258(%r13) /* load f11 */
- ld 12,0x260(%r13) /* load f12 */
- ld 13,0x268(%r13) /* load f13 */
- ld 14,0x270(%r13) /* load f14 */
- ld 15,0x278(%r13) /* load f15 */
-
/* Load old stack */
lg %r15,0x2f8(%r13)
diff --git a/kernel/arch/s390/kernel/syscalls.S b/kernel/arch/s390/kernel/syscalls.S
index 1acad0268..5378c3ea1 100644
--- a/kernel/arch/s390/kernel/syscalls.S
+++ b/kernel/arch/s390/kernel/syscalls.S
@@ -9,12 +9,12 @@
#define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall)
NI_SYSCALL /* 0 */
-SYSCALL(sys_exit,compat_sys_exit)
+SYSCALL(sys_exit,sys_exit)
SYSCALL(sys_fork,sys_fork)
SYSCALL(sys_read,compat_sys_s390_read)
SYSCALL(sys_write,compat_sys_s390_write)
SYSCALL(sys_open,compat_sys_open) /* 5 */
-SYSCALL(sys_close,compat_sys_close)
+SYSCALL(sys_close,sys_close)
SYSCALL(sys_restart_syscall,sys_restart_syscall)
SYSCALL(sys_creat,compat_sys_creat)
SYSCALL(sys_link,compat_sys_link)
@@ -35,21 +35,21 @@ SYSCALL(sys_ni_syscall,compat_sys_s390_setuid16) /* old setuid16 syscall*/
SYSCALL(sys_ni_syscall,compat_sys_s390_getuid16) /* old getuid16 syscall*/
SYSCALL(sys_ni_syscall,compat_sys_stime) /* 25 old stime syscall */
SYSCALL(sys_ptrace,compat_sys_ptrace)
-SYSCALL(sys_alarm,compat_sys_alarm)
+SYSCALL(sys_alarm,sys_alarm)
NI_SYSCALL /* old fstat syscall */
SYSCALL(sys_pause,sys_pause)
SYSCALL(sys_utime,compat_sys_utime) /* 30 */
NI_SYSCALL /* old stty syscall */
NI_SYSCALL /* old gtty syscall */
SYSCALL(sys_access,compat_sys_access)
-SYSCALL(sys_nice,compat_sys_nice)
+SYSCALL(sys_nice,sys_nice)
NI_SYSCALL /* 35 old ftime syscall */
SYSCALL(sys_sync,sys_sync)
-SYSCALL(sys_kill,compat_sys_kill)
+SYSCALL(sys_kill,sys_kill)
SYSCALL(sys_rename,compat_sys_rename)
SYSCALL(sys_mkdir,compat_sys_mkdir)
SYSCALL(sys_rmdir,compat_sys_rmdir) /* 40 */
-SYSCALL(sys_dup,compat_sys_dup)
+SYSCALL(sys_dup,sys_dup)
SYSCALL(sys_pipe,compat_sys_pipe)
SYSCALL(sys_times,compat_sys_times)
NI_SYSCALL /* old prof syscall */
@@ -65,13 +65,13 @@ NI_SYSCALL /* old lock syscall */
SYSCALL(sys_ioctl,compat_sys_ioctl)
SYSCALL(sys_fcntl,compat_sys_fcntl) /* 55 */
NI_SYSCALL /* intel mpx syscall */
-SYSCALL(sys_setpgid,compat_sys_setpgid)
+SYSCALL(sys_setpgid,sys_setpgid)
NI_SYSCALL /* old ulimit syscall */
NI_SYSCALL /* old uname syscall */
-SYSCALL(sys_umask,compat_sys_umask) /* 60 */
+SYSCALL(sys_umask,sys_umask) /* 60 */
SYSCALL(sys_chroot,compat_sys_chroot)
SYSCALL(sys_ustat,compat_sys_ustat)
-SYSCALL(sys_dup2,compat_sys_dup2)
+SYSCALL(sys_dup2,sys_dup2)
SYSCALL(sys_getppid,sys_getppid)
SYSCALL(sys_getpgrp,sys_getpgrp) /* 65 */
SYSCALL(sys_setsid,sys_setsid)
@@ -102,10 +102,10 @@ SYSCALL(sys_old_mmap,compat_sys_s390_old_mmap) /* 90 */
SYSCALL(sys_munmap,compat_sys_munmap)
SYSCALL(sys_truncate,compat_sys_truncate)
SYSCALL(sys_ftruncate,compat_sys_ftruncate)
-SYSCALL(sys_fchmod,compat_sys_fchmod)
+SYSCALL(sys_fchmod,sys_fchmod)
SYSCALL(sys_ni_syscall,compat_sys_s390_fchown16) /* 95 old fchown16 syscall*/
-SYSCALL(sys_getpriority,compat_sys_getpriority)
-SYSCALL(sys_setpriority,compat_sys_setpriority)
+SYSCALL(sys_getpriority,sys_getpriority)
+SYSCALL(sys_setpriority,sys_setpriority)
NI_SYSCALL /* old profil syscall */
SYSCALL(sys_statfs,compat_sys_statfs)
SYSCALL(sys_fstatfs,compat_sys_fstatfs) /* 100 */
@@ -126,7 +126,7 @@ SYSCALL(sys_wait4,compat_sys_wait4)
SYSCALL(sys_swapoff,compat_sys_swapoff) /* 115 */
SYSCALL(sys_sysinfo,compat_sys_sysinfo)
SYSCALL(sys_s390_ipc,compat_sys_s390_ipc)
-SYSCALL(sys_fsync,compat_sys_fsync)
+SYSCALL(sys_fsync,sys_fsync)
SYSCALL(sys_sigreturn,compat_sys_sigreturn)
SYSCALL(sys_clone,compat_sys_clone) /* 120 */
SYSCALL(sys_setdomainname,compat_sys_setdomainname)
@@ -140,35 +140,35 @@ SYSCALL(sys_init_module,compat_sys_init_module)
SYSCALL(sys_delete_module,compat_sys_delete_module)
NI_SYSCALL /* 130: old get_kernel_syms */
SYSCALL(sys_quotactl,compat_sys_quotactl)
-SYSCALL(sys_getpgid,compat_sys_getpgid)
-SYSCALL(sys_fchdir,compat_sys_fchdir)
+SYSCALL(sys_getpgid,sys_getpgid)
+SYSCALL(sys_fchdir,sys_fchdir)
SYSCALL(sys_bdflush,compat_sys_bdflush)
SYSCALL(sys_sysfs,compat_sys_sysfs) /* 135 */
-SYSCALL(sys_s390_personality,compat_sys_s390_personality)
+SYSCALL(sys_s390_personality,sys_s390_personality)
NI_SYSCALL /* for afs_syscall */
SYSCALL(sys_ni_syscall,compat_sys_s390_setfsuid16) /* old setfsuid16 syscall */
SYSCALL(sys_ni_syscall,compat_sys_s390_setfsgid16) /* old setfsgid16 syscall */
SYSCALL(sys_llseek,compat_sys_llseek) /* 140 */
SYSCALL(sys_getdents,compat_sys_getdents)
SYSCALL(sys_select,compat_sys_select)
-SYSCALL(sys_flock,compat_sys_flock)
+SYSCALL(sys_flock,sys_flock)
SYSCALL(sys_msync,compat_sys_msync)
SYSCALL(sys_readv,compat_sys_readv) /* 145 */
SYSCALL(sys_writev,compat_sys_writev)
-SYSCALL(sys_getsid,compat_sys_getsid)
-SYSCALL(sys_fdatasync,compat_sys_fdatasync)
+SYSCALL(sys_getsid,sys_getsid)
+SYSCALL(sys_fdatasync,sys_fdatasync)
SYSCALL(sys_sysctl,compat_sys_sysctl)
SYSCALL(sys_mlock,compat_sys_mlock) /* 150 */
SYSCALL(sys_munlock,compat_sys_munlock)
-SYSCALL(sys_mlockall,compat_sys_mlockall)
+SYSCALL(sys_mlockall,sys_mlockall)
SYSCALL(sys_munlockall,sys_munlockall)
SYSCALL(sys_sched_setparam,compat_sys_sched_setparam)
SYSCALL(sys_sched_getparam,compat_sys_sched_getparam) /* 155 */
SYSCALL(sys_sched_setscheduler,compat_sys_sched_setscheduler)
-SYSCALL(sys_sched_getscheduler,compat_sys_sched_getscheduler)
+SYSCALL(sys_sched_getscheduler,sys_sched_getscheduler)
SYSCALL(sys_sched_yield,sys_sched_yield)
-SYSCALL(sys_sched_get_priority_max,compat_sys_sched_get_priority_max)
-SYSCALL(sys_sched_get_priority_min,compat_sys_sched_get_priority_min) /* 160 */
+SYSCALL(sys_sched_get_priority_max,sys_sched_get_priority_max)
+SYSCALL(sys_sched_get_priority_min,sys_sched_get_priority_min) /* 160 */
SYSCALL(sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval)
SYSCALL(sys_nanosleep,compat_sys_nanosleep)
SYSCALL(sys_mremap,compat_sys_mremap)
@@ -211,20 +211,20 @@ SYSCALL(sys_getuid,sys_getuid)
SYSCALL(sys_getgid,sys_getgid) /* 200 */
SYSCALL(sys_geteuid,sys_geteuid)
SYSCALL(sys_getegid,sys_getegid)
-SYSCALL(sys_setreuid,compat_sys_setreuid)
-SYSCALL(sys_setregid,compat_sys_setregid)
+SYSCALL(sys_setreuid,sys_setreuid)
+SYSCALL(sys_setregid,sys_setregid)
SYSCALL(sys_getgroups,compat_sys_getgroups) /* 205 */
SYSCALL(sys_setgroups,compat_sys_setgroups)
-SYSCALL(sys_fchown,compat_sys_fchown)
-SYSCALL(sys_setresuid,compat_sys_setresuid)
+SYSCALL(sys_fchown,sys_fchown)
+SYSCALL(sys_setresuid,sys_setresuid)
SYSCALL(sys_getresuid,compat_sys_getresuid)
-SYSCALL(sys_setresgid,compat_sys_setresgid) /* 210 */
+SYSCALL(sys_setresgid,sys_setresgid) /* 210 */
SYSCALL(sys_getresgid,compat_sys_getresgid)
SYSCALL(sys_chown,compat_sys_chown)
-SYSCALL(sys_setuid,compat_sys_setuid)
-SYSCALL(sys_setgid,compat_sys_setgid)
-SYSCALL(sys_setfsuid,compat_sys_setfsuid) /* 215 */
-SYSCALL(sys_setfsgid,compat_sys_setfsgid)
+SYSCALL(sys_setuid,sys_setuid)
+SYSCALL(sys_setgid,sys_setgid)
+SYSCALL(sys_setfsuid,sys_setfsuid) /* 215 */
+SYSCALL(sys_setfsgid,sys_setfsgid)
SYSCALL(sys_pivot_root,compat_sys_pivot_root)
SYSCALL(sys_mincore,compat_sys_mincore)
SYSCALL(sys_madvise,compat_sys_madvise)
@@ -245,19 +245,19 @@ SYSCALL(sys_removexattr,compat_sys_removexattr)
SYSCALL(sys_lremovexattr,compat_sys_lremovexattr)
SYSCALL(sys_fremovexattr,compat_sys_fremovexattr) /* 235 */
SYSCALL(sys_gettid,sys_gettid)
-SYSCALL(sys_tkill,compat_sys_tkill)
+SYSCALL(sys_tkill,sys_tkill)
SYSCALL(sys_futex,compat_sys_futex)
SYSCALL(sys_sched_setaffinity,compat_sys_sched_setaffinity)
SYSCALL(sys_sched_getaffinity,compat_sys_sched_getaffinity) /* 240 */
-SYSCALL(sys_tgkill,compat_sys_tgkill)
+SYSCALL(sys_tgkill,sys_tgkill)
NI_SYSCALL /* reserved for TUX */
SYSCALL(sys_io_setup,compat_sys_io_setup)
SYSCALL(sys_io_destroy,compat_sys_io_destroy)
SYSCALL(sys_io_getevents,compat_sys_io_getevents) /* 245 */
SYSCALL(sys_io_submit,compat_sys_io_submit)
SYSCALL(sys_io_cancel,compat_sys_io_cancel)
-SYSCALL(sys_exit_group,compat_sys_exit_group)
-SYSCALL(sys_epoll_create,compat_sys_epoll_create)
+SYSCALL(sys_exit_group,sys_exit_group)
+SYSCALL(sys_epoll_create,sys_epoll_create)
SYSCALL(sys_epoll_ctl,compat_sys_epoll_ctl) /* 250 */
SYSCALL(sys_epoll_wait,compat_sys_epoll_wait)
SYSCALL(sys_set_tid_address,compat_sys_set_tid_address)
@@ -265,8 +265,8 @@ SYSCALL(sys_fadvise64_64,compat_sys_s390_fadvise64)
SYSCALL(sys_timer_create,compat_sys_timer_create)
SYSCALL(sys_timer_settime,compat_sys_timer_settime) /* 255 */
SYSCALL(sys_timer_gettime,compat_sys_timer_gettime)
-SYSCALL(sys_timer_getoverrun,compat_sys_timer_getoverrun)
-SYSCALL(sys_timer_delete,compat_sys_timer_delete)
+SYSCALL(sys_timer_getoverrun,sys_timer_getoverrun)
+SYSCALL(sys_timer_delete,sys_timer_delete)
SYSCALL(sys_clock_settime,compat_sys_clock_settime)
SYSCALL(sys_clock_gettime,compat_sys_clock_gettime) /* 260 */
SYSCALL(sys_clock_getres,compat_sys_clock_getres)
@@ -276,9 +276,9 @@ SYSCALL(sys_ni_syscall,compat_sys_s390_fadvise64_64)
SYSCALL(sys_statfs64,compat_sys_statfs64)
SYSCALL(sys_fstatfs64,compat_sys_fstatfs64)
SYSCALL(sys_remap_file_pages,compat_sys_remap_file_pages)
-NI_SYSCALL /* 268 sys_mbind */
-NI_SYSCALL /* 269 sys_get_mempolicy */
-NI_SYSCALL /* 270 sys_set_mempolicy */
+SYSCALL(sys_mbind,compat_sys_mbind)
+SYSCALL(sys_get_mempolicy,compat_sys_get_mempolicy)
+SYSCALL(sys_set_mempolicy,compat_sys_set_mempolicy)
SYSCALL(sys_mq_open,compat_sys_mq_open)
SYSCALL(sys_mq_unlink,compat_sys_mq_unlink)
SYSCALL(sys_mq_timedsend,compat_sys_mq_timedsend)
@@ -290,12 +290,12 @@ SYSCALL(sys_add_key,compat_sys_add_key)
SYSCALL(sys_request_key,compat_sys_request_key)
SYSCALL(sys_keyctl,compat_sys_keyctl) /* 280 */
SYSCALL(sys_waitid,compat_sys_waitid)
-SYSCALL(sys_ioprio_set,compat_sys_ioprio_set)
-SYSCALL(sys_ioprio_get,compat_sys_ioprio_get)
+SYSCALL(sys_ioprio_set,sys_ioprio_set)
+SYSCALL(sys_ioprio_get,sys_ioprio_get)
SYSCALL(sys_inotify_init,sys_inotify_init)
SYSCALL(sys_inotify_add_watch,compat_sys_inotify_add_watch) /* 285 */
-SYSCALL(sys_inotify_rm_watch,compat_sys_inotify_rm_watch)
-NI_SYSCALL /* 287 sys_migrate_pages */
+SYSCALL(sys_inotify_rm_watch,sys_inotify_rm_watch)
+SYSCALL(sys_migrate_pages,compat_sys_migrate_pages)
SYSCALL(sys_openat,compat_sys_openat)
SYSCALL(sys_mkdirat,compat_sys_mkdirat)
SYSCALL(sys_mknodat,compat_sys_mknodat) /* 290 */
@@ -318,7 +318,7 @@ SYSCALL(sys_splice,compat_sys_splice)
SYSCALL(sys_sync_file_range,compat_sys_s390_sync_file_range)
SYSCALL(sys_tee,compat_sys_tee)
SYSCALL(sys_vmsplice,compat_sys_vmsplice)
-NI_SYSCALL /* 310 sys_move_pages */
+SYSCALL(sys_move_pages,compat_sys_move_pages)
SYSCALL(sys_getcpu,compat_sys_getcpu)
SYSCALL(sys_epoll_pwait,compat_sys_epoll_pwait)
SYSCALL(sys_utimes,compat_sys_utimes)
@@ -326,31 +326,31 @@ SYSCALL(sys_fallocate,compat_sys_s390_fallocate)
SYSCALL(sys_utimensat,compat_sys_utimensat) /* 315 */
SYSCALL(sys_signalfd,compat_sys_signalfd)
NI_SYSCALL /* 317 old sys_timer_fd */
-SYSCALL(sys_eventfd,compat_sys_eventfd)
-SYSCALL(sys_timerfd_create,compat_sys_timerfd_create)
+SYSCALL(sys_eventfd,sys_eventfd)
+SYSCALL(sys_timerfd_create,sys_timerfd_create)
SYSCALL(sys_timerfd_settime,compat_sys_timerfd_settime) /* 320 */
SYSCALL(sys_timerfd_gettime,compat_sys_timerfd_gettime)
SYSCALL(sys_signalfd4,compat_sys_signalfd4)
-SYSCALL(sys_eventfd2,compat_sys_eventfd2)
-SYSCALL(sys_inotify_init1,compat_sys_inotify_init1)
+SYSCALL(sys_eventfd2,sys_eventfd2)
+SYSCALL(sys_inotify_init1,sys_inotify_init1)
SYSCALL(sys_pipe2,compat_sys_pipe2) /* 325 */
-SYSCALL(sys_dup3,compat_sys_dup3)
-SYSCALL(sys_epoll_create1,compat_sys_epoll_create1)
+SYSCALL(sys_dup3,sys_dup3)
+SYSCALL(sys_epoll_create1,sys_epoll_create1)
SYSCALL(sys_preadv,compat_sys_preadv)
SYSCALL(sys_pwritev,compat_sys_pwritev)
SYSCALL(sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo) /* 330 */
SYSCALL(sys_perf_event_open,compat_sys_perf_event_open)
-SYSCALL(sys_fanotify_init,compat_sys_fanotify_init)
+SYSCALL(sys_fanotify_init,sys_fanotify_init)
SYSCALL(sys_fanotify_mark,compat_sys_fanotify_mark)
SYSCALL(sys_prlimit64,compat_sys_prlimit64)
SYSCALL(sys_name_to_handle_at,compat_sys_name_to_handle_at) /* 335 */
SYSCALL(sys_open_by_handle_at,compat_sys_open_by_handle_at)
SYSCALL(sys_clock_adjtime,compat_sys_clock_adjtime)
-SYSCALL(sys_syncfs,compat_sys_syncfs)
-SYSCALL(sys_setns,compat_sys_setns)
+SYSCALL(sys_syncfs,sys_syncfs)
+SYSCALL(sys_setns,sys_setns)
SYSCALL(sys_process_vm_readv,compat_sys_process_vm_readv) /* 340 */
SYSCALL(sys_process_vm_writev,compat_sys_process_vm_writev)
-SYSCALL(sys_s390_runtime_instr,compat_sys_s390_runtime_instr)
+SYSCALL(sys_s390_runtime_instr,sys_s390_runtime_instr)
SYSCALL(sys_kcmp,compat_sys_kcmp)
SYSCALL(sys_finit_module,compat_sys_finit_module)
SYSCALL(sys_sched_setattr,compat_sys_sched_setattr) /* 345 */
@@ -363,3 +363,23 @@ SYSCALL(sys_bpf,compat_sys_bpf)
SYSCALL(sys_s390_pci_mmio_write,compat_sys_s390_pci_mmio_write)
SYSCALL(sys_s390_pci_mmio_read,compat_sys_s390_pci_mmio_read)
SYSCALL(sys_execveat,compat_sys_execveat)
+SYSCALL(sys_userfaultfd,sys_userfaultfd) /* 355 */
+SYSCALL(sys_membarrier,sys_membarrier)
+SYSCALL(sys_recvmmsg,compat_sys_recvmmsg)
+SYSCALL(sys_sendmmsg,compat_sys_sendmmsg)
+SYSCALL(sys_socket,sys_socket)
+SYSCALL(sys_socketpair,compat_sys_socketpair) /* 360 */
+SYSCALL(sys_bind,sys_bind)
+SYSCALL(sys_connect,sys_connect)
+SYSCALL(sys_listen,sys_listen)
+SYSCALL(sys_accept4,sys_accept4)
+SYSCALL(sys_getsockopt,compat_sys_getsockopt) /* 365 */
+SYSCALL(sys_setsockopt,compat_sys_setsockopt)
+SYSCALL(sys_getsockname,compat_sys_getsockname)
+SYSCALL(sys_getpeername,compat_sys_getpeername)
+SYSCALL(sys_sendto,compat_sys_sendto)
+SYSCALL(sys_sendmsg,compat_sys_sendmsg) /* 370 */
+SYSCALL(sys_recvfrom,compat_sys_recvfrom)
+SYSCALL(sys_recvmsg,compat_sys_recvmsg)
+SYSCALL(sys_shutdown,sys_shutdown)
+SYSCALL(sys_mlock2,compat_sys_mlock2)
diff --git a/kernel/arch/s390/kernel/time.c b/kernel/arch/s390/kernel/time.c
index 170ddd201..99f84ac31 100644
--- a/kernel/arch/s390/kernel/time.c
+++ b/kernel/arch/s390/kernel/time.c
@@ -58,6 +58,9 @@ EXPORT_SYMBOL_GPL(sched_clock_base_cc);
static DEFINE_PER_CPU(struct clock_event_device, comparators);
+ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier);
+EXPORT_SYMBOL(s390_epoch_delta_notifier);
+
/*
* Scheduler clock - returns current time in nanosec units.
*/
@@ -76,7 +79,7 @@ unsigned long long monotonic_clock(void)
}
EXPORT_SYMBOL(monotonic_clock);
-void tod_to_timeval(__u64 todval, struct timespec *xt)
+void tod_to_timeval(__u64 todval, struct timespec64 *xt)
{
unsigned long long sec;
@@ -117,11 +120,6 @@ static int s390_next_event(unsigned long delta,
return 0;
}
-static void s390_set_mode(enum clock_event_mode mode,
- struct clock_event_device *evt)
-{
-}
-
/*
* Set up lowcore and control register of the current cpu to
* enable TOD clock and clock comparator interrupts.
@@ -145,7 +143,6 @@ void init_cpu_timer(void)
cd->rating = 400;
cd->cpumask = cpumask_of(cpu);
cd->set_next_event = s390_next_event;
- cd->set_mode = s390_set_mode;
clockevents_register_device(cd);
@@ -181,12 +178,12 @@ static void timing_alert_interrupt(struct ext_code ext_code,
static void etr_reset(void);
static void stp_reset(void);
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
{
tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts);
}
-void read_boot_clock(struct timespec *ts)
+void read_boot_clock64(struct timespec64 *ts)
{
tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts);
}
@@ -381,7 +378,7 @@ static void disable_sync_clock(void *dummy)
* increase the "sequence" counter to avoid the race of an
* etr event and the complete recovery against get_sync_clock.
*/
- atomic_clear_mask(0x80000000, sw_ptr);
+ atomic_andnot(0x80000000, sw_ptr);
atomic_inc(sw_ptr);
}
@@ -392,7 +389,7 @@ static void disable_sync_clock(void *dummy)
static void enable_sync_clock(void)
{
atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
- atomic_set_mask(0x80000000, sw_ptr);
+ atomic_or(0x80000000, sw_ptr);
}
/*
@@ -545,16 +542,17 @@ arch_initcall(etr_init);
* Switch to local machine check. This is called when the last usable
* ETR port goes inactive. After switch to local the clock is not in sync.
*/
-void etr_switch_to_local(void)
+int etr_switch_to_local(void)
{
if (!etr_eacr.sl)
- return;
+ return 0;
disable_sync_clock(NULL);
if (!test_and_set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) {
etr_eacr.es = etr_eacr.sl = 0;
etr_setr(&etr_eacr);
- queue_work(time_sync_wq, &etr_work);
+ return 1;
}
+ return 0;
}
/*
@@ -563,16 +561,22 @@ void etr_switch_to_local(void)
* After a ETR sync check the clock is not in sync. The machine check
* is broadcasted to all cpus at the same time.
*/
-void etr_sync_check(void)
+int etr_sync_check(void)
{
if (!etr_eacr.es)
- return;
+ return 0;
disable_sync_clock(NULL);
if (!test_and_set_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) {
etr_eacr.es = 0;
etr_setr(&etr_eacr);
- queue_work(time_sync_wq, &etr_work);
+ return 1;
}
+ return 0;
+}
+
+void etr_queue_work(void)
+{
+ queue_work(time_sync_wq, &etr_work);
}
/*
@@ -752,7 +756,7 @@ static void clock_sync_cpu(struct clock_sync_data *sync)
static int etr_sync_clock(void *data)
{
static int first;
- unsigned long long clock, old_clock, delay, delta;
+ unsigned long long clock, old_clock, clock_delta, delay, delta;
struct clock_sync_data *etr_sync;
struct etr_aib *sync_port, *aib;
int port;
@@ -789,6 +793,9 @@ static int etr_sync_clock(void *data)
delay = (unsigned long long)
(aib->edf2.etv - sync_port->edf2.etv) << 32;
delta = adjust_time(old_clock, clock, delay);
+ clock_delta = clock - old_clock;
+ atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0,
+ &clock_delta);
etr_sync->fixup_cc = delta;
fixup_clock_comparator(delta);
/* Verify that the clock is properly set. */
@@ -1504,10 +1511,10 @@ static void stp_timing_alert(struct stp_irq_parm *intparm)
* After a STP sync check the clock is not in sync. The machine check
* is broadcasted to all cpus at the same time.
*/
-void stp_sync_check(void)
+int stp_sync_check(void)
{
disable_sync_clock(NULL);
- queue_work(time_sync_wq, &stp_work);
+ return 1;
}
/*
@@ -1516,17 +1523,21 @@ void stp_sync_check(void)
* have matching CTN ids and have a valid stratum-1 configuration
* but the configurations do not match.
*/
-void stp_island_check(void)
+int stp_island_check(void)
{
disable_sync_clock(NULL);
- queue_work(time_sync_wq, &stp_work);
+ return 1;
}
+void stp_queue_work(void)
+{
+ queue_work(time_sync_wq, &stp_work);
+}
static int stp_sync_clock(void *data)
{
static int first;
- unsigned long long old_clock, delta;
+ unsigned long long old_clock, delta, new_clock, clock_delta;
struct clock_sync_data *stp_sync;
int rc;
@@ -1551,7 +1562,11 @@ static int stp_sync_clock(void *data)
old_clock = get_tod_clock();
rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0);
if (rc == 0) {
- delta = adjust_time(old_clock, get_tod_clock(), 0);
+ new_clock = get_tod_clock();
+ delta = adjust_time(old_clock, new_clock, 0);
+ clock_delta = new_clock - old_clock;
+ atomic_notifier_call_chain(&s390_epoch_delta_notifier,
+ 0, &clock_delta);
fixup_clock_comparator(delta);
rc = chsc_sstpi(stp_page, &stp_info,
sizeof(struct stp_sstpi));
diff --git a/kernel/arch/s390/kernel/topology.c b/kernel/arch/s390/kernel/topology.c
index 5728c5bd4..40b8102fd 100644
--- a/kernel/arch/s390/kernel/topology.c
+++ b/kernel/arch/s390/kernel/topology.c
@@ -18,7 +18,10 @@
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/mm.h>
+#include <linux/nodemask.h>
+#include <linux/node.h>
#include <asm/sysinfo.h>
+#include <asm/numa.h>
#define PTF_HORIZONTAL (0UL)
#define PTF_VERTICAL (1UL)
@@ -37,8 +40,10 @@ static struct sysinfo_15_1_x *tl_info;
static int topology_enabled = 1;
static DECLARE_WORK(topology_work, topology_work_fn);
-/* topology_lock protects the socket and book linked lists */
-static DEFINE_SPINLOCK(topology_lock);
+/*
+ * Socket/Book linked lists and per_cpu(cpu_topology) updates are
+ * protected by "sched_domains_mutex".
+ */
static struct mask_info socket_info;
static struct mask_info book_info;
@@ -79,6 +84,7 @@ static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
struct mask_info *socket,
int one_socket_per_cpu)
{
+ struct cpu_topology_s390 *topo;
unsigned int core;
for_each_set_bit(core, &tl_core->mask[0], TOPOLOGY_CORE_BITS) {
@@ -90,15 +96,16 @@ static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
if (lcpu < 0)
continue;
for (i = 0; i <= smp_cpu_mtid; i++) {
- per_cpu(cpu_topology, lcpu + i).book_id = book->id;
- per_cpu(cpu_topology, lcpu + i).core_id = rcore;
- per_cpu(cpu_topology, lcpu + i).thread_id = lcpu + i;
+ topo = &per_cpu(cpu_topology, lcpu + i);
+ topo->book_id = book->id;
+ topo->core_id = rcore;
+ topo->thread_id = lcpu + i;
cpumask_set_cpu(lcpu + i, &book->mask);
cpumask_set_cpu(lcpu + i, &socket->mask);
if (one_socket_per_cpu)
- per_cpu(cpu_topology, lcpu + i).socket_id = rcore;
+ topo->socket_id = rcore;
else
- per_cpu(cpu_topology, lcpu + i).socket_id = socket->id;
+ topo->socket_id = socket->id;
smp_cpu_set_polarization(lcpu + i, tl_core->pp);
}
if (one_socket_per_cpu)
@@ -188,7 +195,6 @@ static void tl_to_masks(struct sysinfo_15_1_x *info)
{
struct cpuid cpu_id;
- spin_lock_irq(&topology_lock);
get_cpu_id(&cpu_id);
clear_masks();
switch (cpu_id.machine) {
@@ -199,7 +205,6 @@ static void tl_to_masks(struct sysinfo_15_1_x *info)
default:
__tl_to_masks_generic(info);
}
- spin_unlock_irq(&topology_lock);
}
static void topology_update_polarization_simple(void)
@@ -244,22 +249,22 @@ int topology_set_cpu_management(int fc)
static void update_cpu_masks(void)
{
- unsigned long flags;
+ struct cpu_topology_s390 *topo;
int cpu;
- spin_lock_irqsave(&topology_lock, flags);
for_each_possible_cpu(cpu) {
- per_cpu(cpu_topology, cpu).thread_mask = cpu_thread_map(cpu);
- per_cpu(cpu_topology, cpu).core_mask = cpu_group_map(&socket_info, cpu);
- per_cpu(cpu_topology, cpu).book_mask = cpu_group_map(&book_info, cpu);
+ topo = &per_cpu(cpu_topology, cpu);
+ topo->thread_mask = cpu_thread_map(cpu);
+ topo->core_mask = cpu_group_map(&socket_info, cpu);
+ topo->book_mask = cpu_group_map(&book_info, cpu);
if (!MACHINE_HAS_TOPOLOGY) {
- per_cpu(cpu_topology, cpu).thread_id = cpu;
- per_cpu(cpu_topology, cpu).core_id = cpu;
- per_cpu(cpu_topology, cpu).socket_id = cpu;
- per_cpu(cpu_topology, cpu).book_id = cpu;
+ topo->thread_id = cpu;
+ topo->core_id = cpu;
+ topo->socket_id = cpu;
+ topo->book_id = cpu;
}
}
- spin_unlock_irqrestore(&topology_lock, flags);
+ numa_update_cpu_topology();
}
void store_topology(struct sysinfo_15_1_x *info)
@@ -274,21 +279,21 @@ int arch_update_cpu_topology(void)
{
struct sysinfo_15_1_x *info = tl_info;
struct device *dev;
- int cpu;
+ int cpu, rc = 0;
- if (!MACHINE_HAS_TOPOLOGY) {
- update_cpu_masks();
- topology_update_polarization_simple();
- return 0;
+ if (MACHINE_HAS_TOPOLOGY) {
+ rc = 1;
+ store_topology(info);
+ tl_to_masks(info);
}
- store_topology(info);
- tl_to_masks(info);
update_cpu_masks();
+ if (!MACHINE_HAS_TOPOLOGY)
+ topology_update_polarization_simple();
for_each_online_cpu(cpu) {
dev = get_cpu_device(cpu);
kobject_uevent(&dev->kobj, KOBJ_CHANGE);
}
- return 1;
+ return rc;
}
static void topology_work_fn(struct work_struct *work)
diff --git a/kernel/arch/s390/kernel/trace.c b/kernel/arch/s390/kernel/trace.c
new file mode 100644
index 000000000..21a5df995
--- /dev/null
+++ b/kernel/arch/s390/kernel/trace.c
@@ -0,0 +1,29 @@
+/*
+ * Tracepoint definitions for s390
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/percpu.h>
+#define CREATE_TRACE_POINTS
+#include <asm/trace/diag.h>
+
+EXPORT_TRACEPOINT_SYMBOL(s390_diagnose);
+
+static DEFINE_PER_CPU(unsigned int, diagnose_trace_depth);
+
+void trace_s390_diagnose_norecursion(int diag_nr)
+{
+ unsigned long flags;
+ unsigned int *depth;
+
+ local_irq_save(flags);
+ depth = this_cpu_ptr(&diagnose_trace_depth);
+ if (*depth == 0) {
+ (*depth)++;
+ trace_s390_diagnose(diag_nr);
+ (*depth)--;
+ }
+ local_irq_restore(flags);
+}
diff --git a/kernel/arch/s390/kernel/traps.c b/kernel/arch/s390/kernel/traps.c
index 4d96c9f53..1b18118bb 100644
--- a/kernel/arch/s390/kernel/traps.c
+++ b/kernel/arch/s390/kernel/traps.c
@@ -19,7 +19,7 @@
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/slab.h>
-#include <asm/switch_to.h>
+#include <asm/fpu/api.h>
#include "entry.h"
int show_unhandled_signals = 1;
@@ -151,7 +151,7 @@ DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
"transaction constraint exception")
-static inline void do_fp_trap(struct pt_regs *regs, int fpc)
+static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc)
{
int si_code = 0;
/* FPC[2] is Data Exception Code */
@@ -224,31 +224,6 @@ NOKPROBE_SYMBOL(illegal_op);
DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
"specification exception");
-int alloc_vector_registers(struct task_struct *tsk)
-{
- __vector128 *vxrs;
- int i;
-
- /* Allocate vector register save area. */
- vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS,
- GFP_KERNEL|__GFP_REPEAT);
- if (!vxrs)
- return -ENOMEM;
- preempt_disable();
- if (tsk == current)
- save_fp_regs(tsk->thread.fp_regs.fprs);
- /* Copy the 16 floating point registers */
- for (i = 0; i < 16; i++)
- *(freg_t *) &vxrs[i] = tsk->thread.fp_regs.fprs[i];
- tsk->thread.vxrs = vxrs;
- if (tsk == current) {
- __ctl_set_bit(0, 17);
- restore_vx_regs(vxrs);
- }
- preempt_enable();
- return 0;
-}
-
void vector_exception(struct pt_regs *regs)
{
int si_code, vic;
@@ -259,8 +234,8 @@ void vector_exception(struct pt_regs *regs)
}
/* get vector interrupt code from fpc */
- asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc));
- vic = (current->thread.fp_regs.fpc & 0xf00) >> 8;
+ save_fpu_regs();
+ vic = (current->thread.fpu.fpc & 0xf00) >> 8;
switch (vic) {
case 1: /* invalid vector operation */
si_code = FPE_FLTINV;
@@ -283,13 +258,6 @@ void vector_exception(struct pt_regs *regs)
do_trap(regs, SIGFPE, si_code, "vector exception");
}
-static int __init disable_vector_extension(char *str)
-{
- S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
- return 1;
-}
-__setup("novx", disable_vector_extension);
-
void data_exception(struct pt_regs *regs)
{
__u16 __user *location;
@@ -297,22 +265,13 @@ void data_exception(struct pt_regs *regs)
location = get_trap_ip(regs);
- asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc));
- /* Check for vector register enablement */
- if (MACHINE_HAS_VX && !current->thread.vxrs &&
- (current->thread.fp_regs.fpc & FPC_DXC_MASK) == 0xfe00) {
- alloc_vector_registers(current);
- /* Vector data exception is suppressing, rewind psw. */
- regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
- clear_pt_regs_flag(regs, PIF_PER_TRAP);
- return;
- }
- if (current->thread.fp_regs.fpc & FPC_DXC_MASK)
+ save_fpu_regs();
+ if (current->thread.fpu.fpc & FPC_DXC_MASK)
signal = SIGFPE;
else
signal = SIGILL;
if (signal == SIGFPE)
- do_fp_trap(regs, current->thread.fp_regs.fpc);
+ do_fp_trap(regs, current->thread.fpu.fpc);
else if (signal)
do_trap(regs, signal, ILL_ILLOPN, "data exception");
}
diff --git a/kernel/arch/s390/kernel/vdso.c b/kernel/arch/s390/kernel/vdso.c
index 0d58269ff..59eddb0e1 100644
--- a/kernel/arch/s390/kernel/vdso.c
+++ b/kernel/arch/s390/kernel/vdso.c
@@ -299,7 +299,7 @@ static int __init vdso_init(void)
get_page(virt_to_page(vdso_data));
- smp_wmb();
+ smp_mb();
return 0;
}
diff --git a/kernel/arch/s390/kernel/vdso32/Makefile b/kernel/arch/s390/kernel/vdso32/Makefile
index 8ad2b34ad..ee8a18e50 100644
--- a/kernel/arch/s390/kernel/vdso32/Makefile
+++ b/kernel/arch/s390/kernel/vdso32/Makefile
@@ -13,7 +13,7 @@ KBUILD_AFLAGS_31 += -m31 -s
KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+ $(call cc-ldoption, -Wl$(comma)--hash-style=both)
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
diff --git a/kernel/arch/s390/kernel/vdso64/Makefile b/kernel/arch/s390/kernel/vdso64/Makefile
index 2a8ddfd12..c4b03f9ed 100644
--- a/kernel/arch/s390/kernel/vdso64/Makefile
+++ b/kernel/arch/s390/kernel/vdso64/Makefile
@@ -13,7 +13,7 @@ KBUILD_AFLAGS_64 += -m64 -s
KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+ $(call cc-ldoption, -Wl$(comma)--hash-style=both)
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
diff --git a/kernel/arch/s390/kernel/vtime.c b/kernel/arch/s390/kernel/vtime.c
index e53d3595a..dafc44f51 100644
--- a/kernel/arch/s390/kernel/vtime.c
+++ b/kernel/arch/s390/kernel/vtime.c
@@ -25,9 +25,10 @@ static DEFINE_SPINLOCK(virt_timer_lock);
static atomic64_t virt_timer_current;
static atomic64_t virt_timer_elapsed;
-static DEFINE_PER_CPU(u64, mt_cycles[32]);
+DEFINE_PER_CPU(u64, mt_cycles[8]);
static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 };
static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 };
+static DEFINE_PER_CPU(u64, mt_scaling_jiffies);
static inline u64 get_vtimer(void)
{
@@ -59,6 +60,34 @@ static inline int virt_timer_forward(u64 elapsed)
return elapsed >= atomic64_read(&virt_timer_current);
}
+static void update_mt_scaling(void)
+{
+ u64 cycles_new[8], *cycles_old;
+ u64 delta, fac, mult, div;
+ int i;
+
+ stcctm5(smp_cpu_mtid + 1, cycles_new);
+ cycles_old = this_cpu_ptr(mt_cycles);
+ fac = 1;
+ mult = div = 0;
+ for (i = 0; i <= smp_cpu_mtid; i++) {
+ delta = cycles_new[i] - cycles_old[i];
+ div += delta;
+ mult *= i + 1;
+ mult += delta * fac;
+ fac *= i + 1;
+ }
+ div *= fac;
+ if (div > 0) {
+ /* Update scaling factor */
+ __this_cpu_write(mt_scaling_mult, mult);
+ __this_cpu_write(mt_scaling_div, div);
+ memcpy(cycles_old, cycles_new,
+ sizeof(u64) * (smp_cpu_mtid + 1));
+ }
+ __this_cpu_write(mt_scaling_jiffies, jiffies_64);
+}
+
/*
* Update process times based on virtual cpu times stored by entry.S
* to the lowcore fields user_timer, system_timer & steal_clock.
@@ -68,7 +97,6 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
struct thread_info *ti = task_thread_info(tsk);
u64 timer, clock, user, system, steal;
u64 user_scaled, system_scaled;
- int i;
timer = S390_lowcore.last_update_timer;
clock = S390_lowcore.last_update_clock;
@@ -84,28 +112,10 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
- /* Do MT utilization calculation */
- if (smp_cpu_mtid) {
- u64 cycles_new[32], *cycles_old;
- u64 delta, mult, div;
-
- cycles_old = this_cpu_ptr(mt_cycles);
- if (stcctm5(smp_cpu_mtid + 1, cycles_new) < 2) {
- mult = div = 0;
- for (i = 0; i <= smp_cpu_mtid; i++) {
- delta = cycles_new[i] - cycles_old[i];
- mult += delta;
- div += (i + 1) * delta;
- }
- if (mult > 0) {
- /* Update scaling factor */
- __this_cpu_write(mt_scaling_mult, mult);
- __this_cpu_write(mt_scaling_div, div);
- memcpy(cycles_old, cycles_new,
- sizeof(u64) * (smp_cpu_mtid + 1));
- }
- }
- }
+ /* Update MT utilization calculation */
+ if (smp_cpu_mtid &&
+ time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
+ update_mt_scaling();
user = S390_lowcore.user_timer - ti->user_timer;
S390_lowcore.steal_timer -= user;
@@ -174,6 +184,11 @@ void vtime_account_irq_enter(struct task_struct *tsk)
S390_lowcore.last_update_timer = get_vtimer();
S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
+ /* Update MT utilization calculation */
+ if (smp_cpu_mtid &&
+ time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
+ update_mt_scaling();
+
system = S390_lowcore.system_timer - ti->system_timer;
S390_lowcore.steal_timer -= system;
ti->system_timer = S390_lowcore.system_timer;
@@ -376,4 +391,11 @@ void vtime_init(void)
{
/* set initial cpu timer */
set_vtimer(VTIMER_MAX_SLICE);
+ /* Setup initial MT scaling values */
+ if (smp_cpu_mtid) {
+ __this_cpu_write(mt_scaling_jiffies, jiffies);
+ __this_cpu_write(mt_scaling_mult, 1);
+ __this_cpu_write(mt_scaling_div, 1);
+ stcctm5(smp_cpu_mtid + 1, this_cpu_ptr(mt_cycles));
+ }
}
diff --git a/kernel/arch/s390/kvm/diag.c b/kernel/arch/s390/kvm/diag.c
index fc7ec9584..5fbfb88f8 100644
--- a/kernel/arch/s390/kvm/diag.c
+++ b/kernel/arch/s390/kvm/diag.c
@@ -27,13 +27,13 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
+ vcpu->stat.diagnose_10++;
if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
|| start < 2 * PAGE_SIZE)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
- vcpu->stat.diagnose_10++;
/*
* We checked for start >= end above, so lets check for the
@@ -75,6 +75,9 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
+ VCPU_EVENT(vcpu, 3, "diag page reference parameter block at 0x%llx",
+ vcpu->run->s.regs.gprs[rx]);
+ vcpu->stat.diagnose_258++;
if (vcpu->run->s.regs.gprs[rx] & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
@@ -85,6 +88,9 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
switch (parm.subcode) {
case 0: /* TOKEN */
+ VCPU_EVENT(vcpu, 3, "pageref token addr 0x%llx "
+ "select mask 0x%llx compare mask 0x%llx",
+ parm.token_addr, parm.select_mask, parm.compare_mask);
if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
/*
* If the pagefault handshake is already activated,
@@ -114,6 +120,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
* the cancel, therefore to reduce code complexity, we assume
* all outstanding tokens are already pending.
*/
+ VCPU_EVENT(vcpu, 3, "pageref cancel addr 0x%llx", parm.token_addr);
if (parm.token_addr || parm.select_mask ||
parm.compare_mask || parm.zarch)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -174,7 +181,8 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
unsigned int reg = vcpu->arch.sie_block->ipa & 0xf;
unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff;
- VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
+ VCPU_EVENT(vcpu, 3, "diag ipl functions, subcode %lx", subcode);
+ vcpu->stat.diagnose_308++;
switch (subcode) {
case 3:
vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
@@ -202,6 +210,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
{
int ret;
+ vcpu->stat.diagnose_500++;
/* No virtio-ccw notification? Get out quickly. */
if (!vcpu->kvm->arch.css_support ||
(vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
diff --git a/kernel/arch/s390/kvm/guestdbg.c b/kernel/arch/s390/kvm/guestdbg.c
index e97b3455d..47518a324 100644
--- a/kernel/arch/s390/kvm/guestdbg.c
+++ b/kernel/arch/s390/kvm/guestdbg.c
@@ -473,10 +473,45 @@ static void filter_guest_per_event(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->iprcc &= ~PGM_PER;
}
+#define pssec(vcpu) (vcpu->arch.sie_block->gcr[1] & _ASCE_SPACE_SWITCH)
+#define hssec(vcpu) (vcpu->arch.sie_block->gcr[13] & _ASCE_SPACE_SWITCH)
+#define old_ssec(vcpu) ((vcpu->arch.sie_block->tecmc >> 31) & 0x1)
+#define old_as_is_home(vcpu) !(vcpu->arch.sie_block->tecmc & 0xffff)
+
void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu)
{
+ int new_as;
+
if (debug_exit_required(vcpu))
vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
filter_guest_per_event(vcpu);
+
+ /*
+ * Only RP, SAC, SACF, PT, PTI, PR, PC instructions can trigger
+ * a space-switch event. PER events enforce space-switch events
+ * for these instructions. So if no PER event for the guest is left,
+ * we might have to filter the space-switch element out, too.
+ */
+ if (vcpu->arch.sie_block->iprcc == PGM_SPACE_SWITCH) {
+ vcpu->arch.sie_block->iprcc = 0;
+ new_as = psw_bits(vcpu->arch.sie_block->gpsw).as;
+
+ /*
+ * If the AS changed from / to home, we had RP, SAC or SACF
+ * instruction. Check primary and home space-switch-event
+ * controls. (theoretically home -> home produced no event)
+ */
+ if (((new_as == PSW_AS_HOME) ^ old_as_is_home(vcpu)) &&
+ (pssec(vcpu) || hssec(vcpu)))
+ vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH;
+
+ /*
+ * PT, PTI, PR, PC instruction operate on primary AS only. Check
+ * if the primary-space-switch-event control was or got set.
+ */
+ if (new_as == PSW_AS_PRIMARY && !old_as_is_home(vcpu) &&
+ (pssec(vcpu) || old_ssec(vcpu)))
+ vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH;
+ }
}
diff --git a/kernel/arch/s390/kvm/intercept.c b/kernel/arch/s390/kvm/intercept.c
index 9e3779e3e..b4a5aa110 100644
--- a/kernel/arch/s390/kvm/intercept.c
+++ b/kernel/arch/s390/kvm/intercept.c
@@ -241,21 +241,6 @@ static int handle_prog(struct kvm_vcpu *vcpu)
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
-static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
-{
- int rc, rc2;
-
- vcpu->stat.exit_instr_and_program++;
- rc = handle_instruction(vcpu);
- rc2 = handle_prog(vcpu);
-
- if (rc == -EOPNOTSUPP)
- vcpu->arch.sie_block->icptcode = 0x04;
- if (rc)
- return rc;
- return rc2;
-}
-
/**
* handle_external_interrupt - used for external interruption interceptions
*
@@ -351,29 +336,28 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
-static const intercept_handler_t intercept_funcs[] = {
- [0x00 >> 2] = handle_noop,
- [0x04 >> 2] = handle_instruction,
- [0x08 >> 2] = handle_prog,
- [0x0C >> 2] = handle_instruction_and_prog,
- [0x10 >> 2] = handle_noop,
- [0x14 >> 2] = handle_external_interrupt,
- [0x18 >> 2] = handle_noop,
- [0x1C >> 2] = kvm_s390_handle_wait,
- [0x20 >> 2] = handle_validity,
- [0x28 >> 2] = handle_stop,
- [0x38 >> 2] = handle_partial_execution,
-};
-
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
{
- intercept_handler_t func;
- u8 code = vcpu->arch.sie_block->icptcode;
-
- if (code & 3 || (code >> 2) >= ARRAY_SIZE(intercept_funcs))
+ switch (vcpu->arch.sie_block->icptcode) {
+ case 0x00:
+ case 0x10:
+ case 0x18:
+ return handle_noop(vcpu);
+ case 0x04:
+ return handle_instruction(vcpu);
+ case 0x08:
+ return handle_prog(vcpu);
+ case 0x14:
+ return handle_external_interrupt(vcpu);
+ case 0x1c:
+ return kvm_s390_handle_wait(vcpu);
+ case 0x20:
+ return handle_validity(vcpu);
+ case 0x28:
+ return handle_stop(vcpu);
+ case 0x38:
+ return handle_partial_execution(vcpu);
+ default:
return -EOPNOTSUPP;
- func = intercept_funcs[code >> 2];
- if (func)
- return func(vcpu);
- return -EOPNOTSUPP;
+ }
}
diff --git a/kernel/arch/s390/kvm/interrupt.c b/kernel/arch/s390/kvm/interrupt.c
index 140a1131a..cc862c486 100644
--- a/kernel/arch/s390/kvm/interrupt.c
+++ b/kernel/arch/s390/kvm/interrupt.c
@@ -30,7 +30,6 @@
#define IOINT_SCHID_MASK 0x0000ffff
#define IOINT_SSID_MASK 0x00030000
#define IOINT_CSSID_MASK 0x03fc0000
-#define IOINT_AI_MASK 0x04000000
#define PFAULT_INIT 0x0600
#define PFAULT_DONE 0x0680
#define VIRTIO_PARAM 0x0d00
@@ -52,11 +51,9 @@ static int psw_mchk_disabled(struct kvm_vcpu *vcpu)
static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
{
- if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) ||
- (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO) ||
- (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT))
- return 0;
- return 1;
+ return psw_extint_disabled(vcpu) &&
+ psw_ioint_disabled(vcpu) &&
+ psw_mchk_disabled(vcpu);
}
static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
@@ -72,8 +69,7 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
static int ckc_irq_pending(struct kvm_vcpu *vcpu)
{
- if (!(vcpu->arch.sie_block->ckc <
- get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
+ if (vcpu->arch.sie_block->ckc >= kvm_s390_get_tod_clock_fast(vcpu->kvm))
return 0;
return ckc_interrupts_enabled(vcpu);
}
@@ -106,14 +102,10 @@ static inline u8 int_word_to_isc(u32 int_word)
return (int_word & 0x38000000) >> 27;
}
-static inline unsigned long pending_floating_irqs(struct kvm_vcpu *vcpu)
-{
- return vcpu->kvm->arch.float_int.pending_irqs;
-}
-
-static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
+static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.local_int.pending_irqs;
+ return vcpu->kvm->arch.float_int.pending_irqs |
+ vcpu->arch.local_int.pending_irqs;
}
static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
@@ -132,8 +124,9 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
{
unsigned long active_mask;
- active_mask = pending_local_irqs(vcpu);
- active_mask |= pending_floating_irqs(vcpu);
+ active_mask = pending_irqs(vcpu);
+ if (!active_mask)
+ return 0;
if (psw_extint_disabled(vcpu))
active_mask &= ~IRQ_PEND_EXT_MASK;
@@ -168,20 +161,20 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
static void __set_cpu_idle(struct kvm_vcpu *vcpu)
{
- atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+ atomic_or(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
}
static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
{
- atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+ atomic_andnot(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
}
static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
{
- atomic_clear_mask(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
- &vcpu->arch.sie_block->cpuflags);
+ atomic_andnot(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
+ &vcpu->arch.sie_block->cpuflags);
vcpu->arch.sie_block->lctl = 0x0000;
vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT);
@@ -194,12 +187,12 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
{
- atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
+ atomic_or(flag, &vcpu->arch.sie_block->cpuflags);
}
static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
{
- if (!(pending_floating_irqs(vcpu) & IRQ_PEND_IO_MASK))
+ if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK))
return;
else if (psw_ioint_disabled(vcpu))
__set_cpuflag(vcpu, CPUSTAT_IO_INT);
@@ -209,7 +202,7 @@ static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
{
- if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK))
+ if (!(pending_irqs(vcpu) & IRQ_PEND_EXT_MASK))
return;
if (psw_extint_disabled(vcpu))
__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
@@ -219,7 +212,7 @@ static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
{
- if (!(pending_local_irqs(vcpu) & IRQ_PEND_MCHK_MASK))
+ if (!(pending_irqs(vcpu) & IRQ_PEND_MCHK_MASK))
return;
if (psw_mchk_disabled(vcpu))
vcpu->arch.sie_block->ictl |= ICTL_LPSW;
@@ -309,8 +302,8 @@ static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
li->irq.ext.ext_params2 = 0;
spin_unlock(&li->lock);
- VCPU_EVENT(vcpu, 4, "interrupt: pfault init parm:%x,parm64:%llx",
- 0, ext.ext_params2);
+ VCPU_EVENT(vcpu, 4, "deliver: pfault init token 0x%llx",
+ ext.ext_params2);
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
KVM_S390_INT_PFAULT_INIT,
0, ext.ext_params2);
@@ -366,7 +359,7 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
spin_unlock(&fi->lock);
if (deliver) {
- VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+ VCPU_EVENT(vcpu, 3, "deliver: machine check mcic 0x%llx",
mchk.mcic);
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
KVM_S390_MCHK,
@@ -401,7 +394,7 @@ static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
int rc;
- VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
+ VCPU_EVENT(vcpu, 3, "%s", "deliver: cpu restart");
vcpu->stat.deliver_restart_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
@@ -425,7 +418,6 @@ static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu)
clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
spin_unlock(&li->lock);
- VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", prefix.address);
vcpu->stat.deliver_prefix_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
KVM_S390_SIGP_SET_PREFIX,
@@ -448,7 +440,7 @@ static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu)
clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
spin_unlock(&li->lock);
- VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
+ VCPU_EVENT(vcpu, 4, "%s", "deliver: sigp emerg");
vcpu->stat.deliver_emergency_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
cpu_addr, 0);
@@ -475,7 +467,7 @@ static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
spin_unlock(&li->lock);
- VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
+ VCPU_EVENT(vcpu, 4, "%s", "deliver: sigp ext call");
vcpu->stat.deliver_external_call++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
KVM_S390_INT_EXTERNAL_CALL,
@@ -504,7 +496,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
memset(&li->irq.pgm, 0, sizeof(pgm_info));
spin_unlock(&li->lock);
- VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
+ VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilc:%d",
pgm_info.code, ilc);
vcpu->stat.deliver_program_int++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
@@ -620,7 +612,7 @@ static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
spin_unlock(&fi->lock);
- VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
+ VCPU_EVENT(vcpu, 4, "deliver: sclp parameter 0x%x",
ext.ext_params);
vcpu->stat.deliver_service_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
@@ -649,9 +641,6 @@ static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
struct kvm_s390_interrupt_info,
list);
if (inti) {
- trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
- KVM_S390_INT_PFAULT_DONE, 0,
- inti->ext.ext_params2);
list_del(&inti->list);
fi->counters[FIRQ_CNTR_PFAULT] -= 1;
}
@@ -660,6 +649,12 @@ static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
spin_unlock(&fi->lock);
if (inti) {
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+ KVM_S390_INT_PFAULT_DONE, 0,
+ inti->ext.ext_params2);
+ VCPU_EVENT(vcpu, 4, "deliver: pfault done token 0x%llx",
+ inti->ext.ext_params2);
+
rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
(u16 *)__LC_EXT_INT_CODE);
rc |= put_guest_lc(vcpu, PFAULT_DONE,
@@ -689,7 +684,7 @@ static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu)
list);
if (inti) {
VCPU_EVENT(vcpu, 4,
- "interrupt: virtio parm:%x,parm64:%llx",
+ "deliver: virtio parm: 0x%x,parm64: 0x%llx",
inti->ext.ext_params, inti->ext.ext_params2);
vcpu->stat.deliver_virtio_interrupt++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
@@ -739,7 +734,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt_info,
list);
if (inti) {
- VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
+ VCPU_EVENT(vcpu, 4, "deliver: I/O 0x%llx", inti->type);
vcpu->stat.deliver_io_int++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
inti->type,
@@ -799,7 +794,7 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
- if (!sclp_has_sigpif())
+ if (!sclp.has_sigpif)
return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
return (sigp_ctrl & SIGP_CTRL_C) &&
@@ -808,23 +803,21 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
{
- int rc;
+ if (deliverable_irqs(vcpu))
+ return 1;
- rc = !!deliverable_irqs(vcpu);
-
- if (!rc && kvm_cpu_has_pending_timer(vcpu))
- rc = 1;
+ if (kvm_cpu_has_pending_timer(vcpu))
+ return 1;
/* external call pending and deliverable */
- if (!rc && kvm_s390_ext_call_pending(vcpu) &&
+ if (kvm_s390_ext_call_pending(vcpu) &&
!psw_extint_disabled(vcpu) &&
(vcpu->arch.sie_block->gcr[0] & 0x2000ul))
- rc = 1;
-
- if (!rc && !exclude_stop && kvm_s390_is_stop_irq_pending(vcpu))
- rc = 1;
+ return 1;
- return rc;
+ if (!exclude_stop && kvm_s390_is_stop_irq_pending(vcpu))
+ return 1;
+ return 0;
}
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
@@ -839,7 +832,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
vcpu->stat.exit_wait_state++;
/* fast path */
- if (kvm_cpu_has_pending_timer(vcpu) || kvm_arch_vcpu_runnable(vcpu))
+ if (kvm_arch_vcpu_runnable(vcpu))
return 0;
if (psw_interrupts_disabled(vcpu)) {
@@ -853,7 +846,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
goto no_timer;
}
- now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
+ now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
/* underflow */
@@ -862,7 +855,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
__set_cpu_idle(vcpu);
hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
- VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
+ VCPU_EVENT(vcpu, 4, "enabled wait via clock comparator: %llu ns", sltime);
no_timer:
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
kvm_vcpu_block(vcpu);
@@ -875,13 +868,13 @@ no_timer:
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
{
- if (swaitqueue_active(&vcpu->wq)) {
+ if (swait_active(&vcpu->wq)) {
/*
* The vcpu gave up the cpu voluntarily, mark it as a good
* yield-candidate.
*/
vcpu->preempted = true;
- swait_wake_interruptible(&vcpu->wq);
+ swake_up(&vcpu->wq);
vcpu->stat.halt_wakeup++;
}
}
@@ -892,7 +885,7 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
u64 now, sltime;
vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
- now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
+ now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
/*
@@ -917,7 +910,7 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
spin_unlock(&li->lock);
/* clear pending external calls set by sigp interpretation facility */
- atomic_clear_mask(CPUSTAT_ECALL_PEND, li->cpuflags);
+ atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags);
vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl = 0;
}
@@ -941,12 +934,9 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
if (cpu_timer_irq_pending(vcpu))
set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
- do {
- irqs = deliverable_irqs(vcpu);
+ while ((irqs = deliverable_irqs(vcpu)) && !rc) {
/* bits are in the order of interrupt priority */
irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT);
- if (irq_type == IRQ_PEND_COUNT)
- break;
if (is_ioirq(irq_type)) {
rc = __deliver_io(vcpu, irq_type);
} else {
@@ -958,9 +948,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
}
rc = func(vcpu);
}
- if (rc)
- break;
- } while (!rc);
+ }
set_intercept_indicators(vcpu);
@@ -971,59 +959,47 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- li->irq.pgm = irq->u.pgm;
+ VCPU_EVENT(vcpu, 3, "inject: program irq code 0x%x", irq->u.pgm.code);
+ trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+ irq->u.pgm.code, 0);
+
+ if (irq->u.pgm.code == PGM_PER) {
+ li->irq.pgm.code |= PGM_PER;
+ /* only modify PER related information */
+ li->irq.pgm.per_address = irq->u.pgm.per_address;
+ li->irq.pgm.per_code = irq->u.pgm.per_code;
+ li->irq.pgm.per_atmid = irq->u.pgm.per_atmid;
+ li->irq.pgm.per_access_id = irq->u.pgm.per_access_id;
+ } else if (!(irq->u.pgm.code & PGM_PER)) {
+ li->irq.pgm.code = (li->irq.pgm.code & PGM_PER) |
+ irq->u.pgm.code;
+ /* only modify non-PER information */
+ li->irq.pgm.trans_exc_code = irq->u.pgm.trans_exc_code;
+ li->irq.pgm.mon_code = irq->u.pgm.mon_code;
+ li->irq.pgm.data_exc_code = irq->u.pgm.data_exc_code;
+ li->irq.pgm.mon_class_nr = irq->u.pgm.mon_class_nr;
+ li->irq.pgm.exc_access_id = irq->u.pgm.exc_access_id;
+ li->irq.pgm.op_access_id = irq->u.pgm.op_access_id;
+ } else {
+ li->irq.pgm = irq->u.pgm;
+ }
set_bit(IRQ_PEND_PROG, &li->pending_irqs);
return 0;
}
-int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
-{
- struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- struct kvm_s390_irq irq;
-
- VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
- trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, code,
- 0, 1);
- spin_lock(&li->lock);
- irq.u.pgm.code = code;
- __inject_prog(vcpu, &irq);
- BUG_ON(swaitqueue_active(li->wq));
- spin_unlock(&li->lock);
- return 0;
-}
-
-int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
- struct kvm_s390_pgm_info *pgm_info)
-{
- struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- struct kvm_s390_irq irq;
- int rc;
-
- VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
- pgm_info->code);
- trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
- pgm_info->code, 0, 1);
- spin_lock(&li->lock);
- irq.u.pgm = *pgm_info;
- rc = __inject_prog(vcpu, &irq);
- BUG_ON(swaitqueue_active(li->wq));
- spin_unlock(&li->lock);
- return rc;
-}
-
static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- VCPU_EVENT(vcpu, 3, "inject: external irq params:%x, params2:%llx",
- irq->u.ext.ext_params, irq->u.ext.ext_params2);
+ VCPU_EVENT(vcpu, 4, "inject: pfault init parameter block at 0x%llx",
+ irq->u.ext.ext_params2);
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT,
irq->u.ext.ext_params,
- irq->u.ext.ext_params2, 2);
+ irq->u.ext.ext_params2);
li->irq.ext = irq->u.ext;
set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
- atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+ atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
return 0;
}
@@ -1038,7 +1014,7 @@ static int __inject_extcall_sigpif(struct kvm_vcpu *vcpu, uint16_t src_id)
/* another external call is pending */
return -EBUSY;
}
- atomic_set_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+ atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
return 0;
}
@@ -1048,23 +1024,22 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
struct kvm_s390_extcall_info *extcall = &li->irq.extcall;
uint16_t src_id = irq->u.extcall.code;
- VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
+ VCPU_EVENT(vcpu, 4, "inject: external call source-cpu:%u",
src_id);
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL,
- src_id, 0, 2);
+ src_id, 0);
/* sending vcpu invalid */
- if (src_id >= KVM_MAX_VCPUS ||
- kvm_get_vcpu(vcpu->kvm, src_id) == NULL)
+ if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL)
return -EINVAL;
- if (sclp_has_sigpif())
+ if (sclp.has_sigpif)
return __inject_extcall_sigpif(vcpu, src_id);
if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
return -EBUSY;
*extcall = irq->u.extcall;
- atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+ atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
return 0;
}
@@ -1073,10 +1048,10 @@ static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
struct kvm_s390_prefix_info *prefix = &li->irq.prefix;
- VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
+ VCPU_EVENT(vcpu, 3, "inject: set prefix to %x",
irq->u.prefix.address);
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX,
- irq->u.prefix.address, 0, 2);
+ irq->u.prefix.address, 0);
if (!is_vcpu_stopped(vcpu))
return -EBUSY;
@@ -1093,7 +1068,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
struct kvm_s390_stop_info *stop = &li->irq.stop;
int rc = 0;
- trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2);
+ trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0);
if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS)
return -EINVAL;
@@ -1117,8 +1092,8 @@ static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- VCPU_EVENT(vcpu, 3, "inject: restart type %llx", irq->type);
- trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0, 2);
+ VCPU_EVENT(vcpu, 3, "%s", "inject: restart int");
+ trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
set_bit(IRQ_PEND_RESTART, &li->pending_irqs);
return 0;
@@ -1129,14 +1104,18 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- VCPU_EVENT(vcpu, 3, "inject: emergency %u\n",
+ VCPU_EVENT(vcpu, 4, "inject: emergency from cpu %u",
irq->u.emerg.code);
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
- irq->u.emerg.code, 0, 2);
+ irq->u.emerg.code, 0);
+
+ /* sending vcpu invalid */
+ if (kvm_get_vcpu_by_id(vcpu->kvm, irq->u.emerg.code) == NULL)
+ return -EINVAL;
set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
- atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+ atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
return 0;
}
@@ -1145,10 +1124,10 @@ static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
struct kvm_s390_mchk_info *mchk = &li->irq.mchk;
- VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
+ VCPU_EVENT(vcpu, 3, "inject: machine check mcic 0x%llx",
irq->u.mchk.mcic);
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0,
- irq->u.mchk.mcic, 2);
+ irq->u.mchk.mcic);
/*
* Because repressible machine checks can be indicated along with
@@ -1175,12 +1154,12 @@ static int __inject_ckc(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CLOCK_COMP);
+ VCPU_EVENT(vcpu, 3, "%s", "inject: clock comparator external");
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
- 0, 0, 2);
+ 0, 0);
set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
- atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+ atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
return 0;
}
@@ -1188,12 +1167,12 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CPU_TIMER);
+ VCPU_EVENT(vcpu, 3, "%s", "inject: cpu timer external");
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
- 0, 0, 2);
+ 0, 0);
set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
- atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+ atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
return 0;
}
@@ -1340,17 +1319,56 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
return 0;
}
-static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
+/*
+ * Find a destination VCPU for a floating irq and kick it.
+ */
+static void __floating_irq_kick(struct kvm *kvm, u64 type)
{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
struct kvm_s390_local_interrupt *li;
- struct kvm_s390_float_interrupt *fi;
- struct kvm_vcpu *dst_vcpu = NULL;
- int sigcpu;
+ struct kvm_vcpu *dst_vcpu;
+ int sigcpu, online_vcpus, nr_tries = 0;
+
+ online_vcpus = atomic_read(&kvm->online_vcpus);
+ if (!online_vcpus)
+ return;
+
+ /* find idle VCPUs first, then round robin */
+ sigcpu = find_first_bit(fi->idle_mask, online_vcpus);
+ if (sigcpu == online_vcpus) {
+ do {
+ sigcpu = fi->next_rr_cpu;
+ fi->next_rr_cpu = (fi->next_rr_cpu + 1) % online_vcpus;
+ /* avoid endless loops if all vcpus are stopped */
+ if (nr_tries++ >= online_vcpus)
+ return;
+ } while (is_vcpu_stopped(kvm_get_vcpu(kvm, sigcpu)));
+ }
+ dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
+
+ /* make the VCPU drop out of the SIE, or wake it up if sleeping */
+ li = &dst_vcpu->arch.local_int;
+ spin_lock(&li->lock);
+ switch (type) {
+ case KVM_S390_MCHK:
+ atomic_or(CPUSTAT_STOP_INT, li->cpuflags);
+ break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ atomic_or(CPUSTAT_IO_INT, li->cpuflags);
+ break;
+ default:
+ atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
+ break;
+ }
+ spin_unlock(&li->lock);
+ kvm_s390_vcpu_wakeup(dst_vcpu);
+}
+
+static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
+{
u64 type = READ_ONCE(inti->type);
int rc;
- fi = &kvm->arch.float_int;
-
switch (type) {
case KVM_S390_MCHK:
rc = __inject_float_mchk(kvm, inti);
@@ -1373,32 +1391,8 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
if (rc)
return rc;
- sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
- if (sigcpu == KVM_MAX_VCPUS) {
- do {
- sigcpu = fi->next_rr_cpu++;
- if (sigcpu == KVM_MAX_VCPUS)
- sigcpu = fi->next_rr_cpu = 0;
- } while (kvm_get_vcpu(kvm, sigcpu) == NULL);
- }
- dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
- li = &dst_vcpu->arch.local_int;
- spin_lock(&li->lock);
- switch (type) {
- case KVM_S390_MCHK:
- atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
- break;
- case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
- atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
- break;
- default:
- atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- break;
- }
- spin_unlock(&li->lock);
- kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
+ __floating_irq_kick(kvm, type);
return 0;
-
}
int kvm_s390_inject_vm(struct kvm *kvm,
@@ -1420,20 +1414,20 @@ int kvm_s390_inject_vm(struct kvm *kvm,
inti->ext.ext_params2 = s390int->parm64;
break;
case KVM_S390_INT_SERVICE:
- VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
+ VM_EVENT(kvm, 4, "inject: sclp parm:%x", s390int->parm);
inti->ext.ext_params = s390int->parm;
break;
case KVM_S390_INT_PFAULT_DONE:
inti->ext.ext_params2 = s390int->parm64;
break;
case KVM_S390_MCHK:
- VM_EVENT(kvm, 5, "inject: machine check parm64:%llx",
+ VM_EVENT(kvm, 3, "inject: machine check mcic 0x%llx",
s390int->parm64);
inti->mchk.cr14 = s390int->parm; /* upper bits are not used */
inti->mchk.mcic = s390int->parm64;
break;
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
- if (inti->type & IOINT_AI_MASK)
+ if (inti->type & KVM_S390_INT_IO_AI_MASK)
VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
else
VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x",
@@ -1520,8 +1514,6 @@ static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
switch (irq->type) {
case KVM_S390_PROGRAM_INT:
- VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
- irq->u.pgm.code);
rc = __inject_prog(vcpu, irq);
break;
case KVM_S390_SIGP_SET_PREFIX:
diff --git a/kernel/arch/s390/kvm/kvm-s390.c b/kernel/arch/s390/kvm/kvm-s390.c
index 8cd8e7b28..575dc123b 100644
--- a/kernel/arch/s390/kvm/kvm-s390.c
+++ b/kernel/arch/s390/kvm/kvm-s390.c
@@ -28,6 +28,7 @@
#include <linux/vmalloc.h>
#include <asm/asm-offsets.h>
#include <asm/lowcore.h>
+#include <asm/etr.h>
#include <asm/pgtable.h>
#include <asm/nmi.h>
#include <asm/switch_to.h>
@@ -36,6 +37,10 @@
#include "kvm-s390.h"
#include "gaccess.h"
+#define KMSG_COMPONENT "kvm-s390"
+#undef pr_fmt
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
#define CREATE_TRACE_POINTS
#include "trace.h"
#include "trace-s390.h"
@@ -58,6 +63,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
+ { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
@@ -104,13 +110,16 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "diagnose_10", VCPU_STAT(diagnose_10) },
{ "diagnose_44", VCPU_STAT(diagnose_44) },
{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
+ { "diagnose_258", VCPU_STAT(diagnose_258) },
+ { "diagnose_308", VCPU_STAT(diagnose_308) },
+ { "diagnose_500", VCPU_STAT(diagnose_500) },
{ NULL }
};
/* upper facilities limit for kvm */
unsigned long kvm_s390_fac_list_mask[] = {
0xffe6fffbfcfdfc40UL,
- 0x005c800000000000UL,
+ 0x005e800000000000UL,
};
unsigned long kvm_s390_fac_list_mask_size(void)
@@ -120,6 +129,7 @@ unsigned long kvm_s390_fac_list_mask_size(void)
}
static struct gmap_notifier gmap_notifier;
+debug_info_t *kvm_s390_dbf;
/* Section: not file related */
int kvm_arch_hardware_enable(void)
@@ -130,24 +140,69 @@ int kvm_arch_hardware_enable(void)
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
+/*
+ * This callback is executed during stop_machine(). All CPUs are therefore
+ * temporarily stopped. In order not to change guest behavior, we have to
+ * disable preemption whenever we touch the epoch of kvm and the VCPUs,
+ * so a CPU won't be stopped while calculating with the epoch.
+ */
+static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
+ void *v)
+{
+ struct kvm *kvm;
+ struct kvm_vcpu *vcpu;
+ int i;
+ unsigned long long *delta = v;
+
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ kvm->arch.epoch -= *delta;
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ vcpu->arch.sie_block->epoch -= *delta;
+ }
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block kvm_clock_notifier = {
+ .notifier_call = kvm_clock_sync,
+};
+
int kvm_arch_hardware_setup(void)
{
gmap_notifier.notifier_call = kvm_gmap_notifier;
gmap_register_ipte_notifier(&gmap_notifier);
+ atomic_notifier_chain_register(&s390_epoch_delta_notifier,
+ &kvm_clock_notifier);
return 0;
}
void kvm_arch_hardware_unsetup(void)
{
gmap_unregister_ipte_notifier(&gmap_notifier);
+ atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
+ &kvm_clock_notifier);
}
int kvm_arch_init(void *opaque)
{
+ kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
+ if (!kvm_s390_dbf)
+ return -ENOMEM;
+
+ if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
+ debug_unregister(kvm_s390_dbf);
+ return -ENOMEM;
+ }
+
/* Register floating interrupt controller interface. */
return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
}
+void kvm_arch_exit(void)
+{
+ debug_unregister(kvm_s390_dbf);
+}
+
/* Section: device related */
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
@@ -236,6 +291,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
{
int r;
unsigned long n;
+ struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int is_dirty = 0;
@@ -245,7 +301,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
if (log->slot >= KVM_USER_MEM_SLOTS)
goto out;
- memslot = id_to_memslot(kvm->memslots, log->slot);
+ slots = kvm_memslots(kvm);
+ memslot = id_to_memslot(slots, log->slot);
r = -ENOENT;
if (!memslot->dirty_bitmap)
goto out;
@@ -275,22 +332,31 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
switch (cap->cap) {
case KVM_CAP_S390_IRQCHIP:
+ VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
kvm->arch.use_irqchip = 1;
r = 0;
break;
case KVM_CAP_S390_USER_SIGP:
+ VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
kvm->arch.user_sigp = 1;
r = 0;
break;
case KVM_CAP_S390_VECTOR_REGISTERS:
- if (MACHINE_HAS_VX) {
+ mutex_lock(&kvm->lock);
+ if (atomic_read(&kvm->online_vcpus)) {
+ r = -EBUSY;
+ } else if (MACHINE_HAS_VX) {
set_kvm_facility(kvm->arch.model.fac->mask, 129);
set_kvm_facility(kvm->arch.model.fac->list, 129);
r = 0;
} else
r = -EINVAL;
+ mutex_unlock(&kvm->lock);
+ VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
+ r ? "(not available)" : "(success)");
break;
case KVM_CAP_S390_USER_STSI:
+ VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
kvm->arch.user_stsi = 1;
r = 0;
break;
@@ -308,6 +374,8 @@ static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *att
switch (attr->attr) {
case KVM_S390_VM_MEM_LIMIT_SIZE:
ret = 0;
+ VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
+ kvm->arch.gmap->asce_end);
if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
ret = -EFAULT;
break;
@@ -324,7 +392,13 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
unsigned int idx;
switch (attr->attr) {
case KVM_S390_VM_MEM_ENABLE_CMMA:
+ /* enable CMMA only for z10 and later (EDAT_1) */
+ ret = -EINVAL;
+ if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
+ break;
+
ret = -EBUSY;
+ VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
mutex_lock(&kvm->lock);
if (atomic_read(&kvm->online_vcpus) == 0) {
kvm->arch.use_cmma = 1;
@@ -333,6 +407,11 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
mutex_unlock(&kvm->lock);
break;
case KVM_S390_VM_MEM_CLR_CMMA:
+ ret = -EINVAL;
+ if (!kvm->arch.use_cmma)
+ break;
+
+ VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
mutex_lock(&kvm->lock);
idx = srcu_read_lock(&kvm->srcu);
s390_reset_cmma(kvm->arch.gmap->mm);
@@ -368,6 +447,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
}
}
mutex_unlock(&kvm->lock);
+ VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
break;
}
default:
@@ -394,22 +474,26 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
kvm->arch.crypto.crycb->aes_wrapping_key_mask,
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
kvm->arch.crypto.aes_kw = 1;
+ VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
break;
case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
get_random_bytes(
kvm->arch.crypto.crycb->dea_wrapping_key_mask,
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
kvm->arch.crypto.dea_kw = 1;
+ VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
break;
case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
kvm->arch.crypto.aes_kw = 0;
memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
+ VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
break;
case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
kvm->arch.crypto.dea_kw = 0;
memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
+ VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
break;
default:
mutex_unlock(&kvm->lock);
@@ -434,31 +518,20 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
if (gtod_high != 0)
return -EINVAL;
+ VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
return 0;
}
static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
{
- struct kvm_vcpu *cur_vcpu;
- unsigned int vcpu_idx;
- u64 host_tod, gtod;
- int r;
+ u64 gtod;
if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
return -EFAULT;
- r = store_tod_clock(&host_tod);
- if (r)
- return r;
-
- mutex_lock(&kvm->lock);
- kvm->arch.epoch = gtod - host_tod;
- kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) {
- cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
- exit_sie(cur_vcpu);
- }
- mutex_unlock(&kvm->lock);
+ kvm_s390_set_tod_clock(kvm, gtod);
+ VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
return 0;
}
@@ -490,22 +563,19 @@ static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
if (copy_to_user((void __user *)attr->addr, &gtod_high,
sizeof(gtod_high)))
return -EFAULT;
+ VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
return 0;
}
static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
{
- u64 host_tod, gtod;
- int r;
-
- r = store_tod_clock(&host_tod);
- if (r)
- return r;
+ u64 gtod;
- gtod = host_tod + kvm->arch.epoch;
+ gtod = kvm_s390_get_tod_clock_fast(kvm);
if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
return -EFAULT;
+ VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
return 0;
}
@@ -604,7 +674,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
goto out;
}
get_cpu_id((struct cpuid *) &mach->cpuid);
- mach->ibc = sclp_get_ibc();
+ mach->ibc = sclp.ibc;
memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
S390_ARCH_FAC_LIST_SIZE_BYTE);
memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
@@ -815,7 +885,9 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
}
/* Enable storage key handling for the guest */
- s390_enable_skey();
+ r = s390_enable_skey();
+ if (r)
+ goto out;
for (i = 0; i < args->count; i++) {
hva = gfn_to_hva(kvm, args->start_gfn + i);
@@ -873,8 +945,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
if (kvm->arch.use_irqchip) {
/* Set up dummy routing. */
memset(&routing, 0, sizeof(routing));
- kvm_set_irq_routing(kvm, &routing, 0, 0);
- r = 0;
+ r = kvm_set_irq_routing(kvm, &routing, 0, 0);
}
break;
}
@@ -1031,13 +1102,15 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!kvm->arch.sca)
goto out_err;
spin_lock(&kvm_lock);
- sca_offset = (sca_offset + 16) & 0x7f0;
+ sca_offset += 16;
+ if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE)
+ sca_offset = 0;
kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
spin_unlock(&kvm_lock);
sprintf(debug_name, "kvm-%u", current->pid);
- kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
+ kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
if (!kvm->arch.dbf)
goto out_err;
@@ -1068,7 +1141,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
S390_ARCH_FAC_LIST_SIZE_BYTE);
kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
- kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff;
+ kvm->arch.model.ibc = sclp.ibc & 0x0fff;
if (kvm_s390_crypto_init(kvm) < 0)
goto out_err;
@@ -1080,7 +1153,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
mutex_init(&kvm->arch.ipte_mutex);
debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
- VM_EVENT(kvm, 3, "%s", "vm created");
+ VM_EVENT(kvm, 3, "vm created with type %lu", type);
if (type & KVM_VM_S390_UCONTROL) {
kvm->arch.gmap = NULL;
@@ -1097,6 +1170,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.epoch = 0;
spin_lock_init(&kvm->arch.start_stop_lock);
+ KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
return 0;
out_err:
@@ -1104,6 +1178,7 @@ out_err:
free_page((unsigned long)kvm->arch.model.fac);
debug_unregister(kvm->arch.dbf);
free_page((unsigned long)(kvm->arch.sca));
+ KVM_EVENT(3, "creation of vm failed: %d", rc);
return rc;
}
@@ -1125,7 +1200,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
if (kvm_is_ucontrol(vcpu->kvm))
gmap_free(vcpu->arch.gmap);
- if (kvm_s390_cmma_enabled(vcpu->kvm))
+ if (vcpu->kvm->arch.use_cmma)
kvm_s390_vcpu_unsetup_cmma(vcpu);
free_page((unsigned long)(vcpu->arch.sie_block));
@@ -1160,6 +1235,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
gmap_free(kvm->arch.gmap);
kvm_s390_destroy_adapters(kvm);
kvm_s390_clear_float_irqs(kvm);
+ KVM_EVENT(3, "vm 0x%p destroyed", kvm);
}
/* Section: vcpu related */
@@ -1194,41 +1270,40 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
- save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
- if (test_kvm_facility(vcpu->kvm, 129))
- save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
- else
- save_fp_regs(vcpu->arch.host_fpregs.fprs);
+ /* Save host register state */
+ save_fpu_regs();
+ vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
+ vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
+
+ /* Depending on MACHINE_HAS_VX, data stored to vrs either
+ * has vector register or floating point register format.
+ */
+ current->thread.fpu.regs = vcpu->run->s.regs.vrs;
+ current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
+ if (test_fp_ctl(current->thread.fpu.fpc))
+ /* User space provided an invalid FPC, let's clear it */
+ current->thread.fpu.fpc = 0;
+
save_access_regs(vcpu->arch.host_acrs);
- if (test_kvm_facility(vcpu->kvm, 129)) {
- restore_fp_ctl(&vcpu->run->s.regs.fpc);
- restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
- } else {
- restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
- restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
- }
restore_access_regs(vcpu->run->s.regs.acrs);
gmap_enable(vcpu->arch.gmap);
- atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+ atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
- atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+ atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
gmap_disable(vcpu->arch.gmap);
- if (test_kvm_facility(vcpu->kvm, 129)) {
- save_fp_ctl(&vcpu->run->s.regs.fpc);
- save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
- } else {
- save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
- save_fp_regs(vcpu->arch.guest_fpregs.fprs);
- }
+
+ /* Save guest register state */
+ save_fpu_regs();
+ vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
+
+ /* Restore host register state */
+ current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
+ current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
+
save_access_regs(vcpu->run->s.regs.acrs);
- restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
- if (test_kvm_facility(vcpu->kvm, 129))
- restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
- else
- restore_fp_regs(vcpu->arch.host_fpregs.fprs);
restore_access_regs(vcpu->arch.host_acrs);
}
@@ -1244,8 +1319,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
vcpu->arch.sie_block->gcr[0] = 0xE0UL;
vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
- vcpu->arch.guest_fpregs.fpc = 0;
- asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
+ /* make sure the new fpc will be lazily loaded */
+ save_fpu_regs();
+ current->thread.fpu.fpc = 0;
vcpu->arch.sie_block->gbea = 1;
vcpu->arch.sie_block->pp = 0;
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
@@ -1258,7 +1334,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
mutex_lock(&vcpu->kvm->lock);
+ preempt_disable();
vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
+ preempt_enable();
mutex_unlock(&vcpu->kvm->lock);
if (!kvm_is_ucontrol(vcpu->kvm))
vcpu->arch.gmap = vcpu->kvm->arch.gmap;
@@ -1311,8 +1389,13 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
CPUSTAT_SM |
- CPUSTAT_STOPPED |
- CPUSTAT_GED);
+ CPUSTAT_STOPPED);
+
+ if (test_kvm_facility(vcpu->kvm, 78))
+ atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
+ else if (test_kvm_facility(vcpu->kvm, 8))
+ atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
+
kvm_s390_vcpu_setup_model(vcpu);
vcpu->arch.sie_block->ecb = 6;
@@ -1321,9 +1404,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->ecb2 = 8;
vcpu->arch.sie_block->eca = 0xC1002000U;
- if (sclp_has_siif())
+ if (sclp.has_siif)
vcpu->arch.sie_block->eca |= 1;
- if (sclp_has_sigpif())
+ if (sclp.has_sigpif)
vcpu->arch.sie_block->eca |= 0x10000000U;
if (test_kvm_facility(vcpu->kvm, 129)) {
vcpu->arch.sie_block->eca |= 0x00020000;
@@ -1331,7 +1414,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
}
vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
- if (kvm_s390_cmma_enabled(vcpu->kvm)) {
+ if (vcpu->kvm->arch.use_cmma) {
rc = kvm_s390_vcpu_setup_cmma(vcpu);
if (rc)
return rc;
@@ -1366,7 +1449,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.sie_block = &sie_page->sie_block;
vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
- vcpu->arch.host_vregs = &sie_page->vregs;
vcpu->arch.sie_block->icpua = id;
if (!kvm_is_ucontrol(kvm)) {
@@ -1409,14 +1491,26 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
return kvm_s390_vcpu_has_irq(vcpu, 0);
}
-void s390_vcpu_block(struct kvm_vcpu *vcpu)
+void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
{
- atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+ atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+ exit_sie(vcpu);
}
-void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
+void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
{
- atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+ atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+}
+
+static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
+{
+ atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+ exit_sie(vcpu);
+}
+
+static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
+{
+ atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
}
/*
@@ -1425,16 +1519,16 @@ void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
* return immediately. */
void exit_sie(struct kvm_vcpu *vcpu)
{
- atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
+ atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
cpu_relax();
}
-/* Kick a guest cpu out of SIE and prevent SIE-reentry */
-void exit_sie_sync(struct kvm_vcpu *vcpu)
+/* Kick a guest cpu out of SIE to process a request synchronously */
+void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
{
- s390_vcpu_block(vcpu);
- exit_sie(vcpu);
+ kvm_make_request(req, vcpu);
+ kvm_s390_vcpu_request(vcpu);
}
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
@@ -1447,8 +1541,7 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
/* match against both prefix pages */
if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
- kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
- exit_sie_sync(vcpu);
+ kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
}
}
}
@@ -1597,19 +1690,27 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
+ /* make sure the new values will be lazily loaded */
+ save_fpu_regs();
if (test_fp_ctl(fpu->fpc))
return -EINVAL;
- memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
- vcpu->arch.guest_fpregs.fpc = fpu->fpc;
- restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
- restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+ current->thread.fpu.fpc = fpu->fpc;
+ if (MACHINE_HAS_VX)
+ convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
+ else
+ memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
return 0;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
- fpu->fpc = vcpu->arch.guest_fpregs.fpc;
+ /* make sure we have the latest values */
+ save_fpu_regs();
+ if (MACHINE_HAS_VX)
+ convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
+ else
+ memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
+ fpu->fpc = current->thread.fpu.fpc;
return 0;
}
@@ -1650,19 +1751,19 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
if (dbg->control & KVM_GUESTDBG_ENABLE) {
vcpu->guest_debug = dbg->control;
/* enforce guest PER */
- atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+ atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
rc = kvm_s390_import_bp_data(vcpu, dbg);
} else {
- atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+ atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
vcpu->arch.guestdbg.last_bp = 0;
}
if (rc) {
vcpu->guest_debug = 0;
kvm_s390_clear_bp_data(vcpu);
- atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+ atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
}
return rc;
@@ -1701,18 +1802,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
return rc;
}
-bool kvm_s390_cmma_enabled(struct kvm *kvm)
-{
- if (!MACHINE_IS_LPAR)
- return false;
- /* only enable for z10 and later */
- if (!MACHINE_HAS_EDAT1)
- return false;
- if (!kvm->arch.use_cmma)
- return false;
- return true;
-}
-
static bool ibs_enabled(struct kvm_vcpu *vcpu)
{
return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
@@ -1721,7 +1810,9 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu)
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
{
retry:
- s390_vcpu_unblock(vcpu);
+ kvm_s390_vcpu_request_handled(vcpu);
+ if (!vcpu->requests)
+ return 0;
/*
* We use MMU_RELOAD just to re-arm the ipte notifier for the
* guest prefix page. gmap_ipte_notify will wait on the ptl lock.
@@ -1747,7 +1838,7 @@ retry:
if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
if (!ibs_enabled(vcpu)) {
trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
- atomic_set_mask(CPUSTAT_IBS,
+ atomic_or(CPUSTAT_IBS,
&vcpu->arch.sie_block->cpuflags);
}
goto retry;
@@ -1756,7 +1847,7 @@ retry:
if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
if (ibs_enabled(vcpu)) {
trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
- atomic_clear_mask(CPUSTAT_IBS,
+ atomic_andnot(CPUSTAT_IBS,
&vcpu->arch.sie_block->cpuflags);
}
goto retry;
@@ -1768,6 +1859,22 @@ retry:
return 0;
}
+void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
+{
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ mutex_lock(&kvm->lock);
+ preempt_disable();
+ kvm->arch.epoch = tod - get_tod_clock();
+ kvm_s390_vcpu_block_all(kvm);
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ vcpu->arch.sie_block->epoch = kvm->arch.epoch;
+ kvm_s390_vcpu_unblock_all(kvm);
+ preempt_enable();
+ mutex_unlock(&kvm->lock);
+}
+
/**
* kvm_arch_fault_in_page - fault-in guest page if necessary
* @vcpu: The corresponding virtual cpu
@@ -1993,12 +2100,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
* As PF_VCPU will be used in fault handler, between
* guest_enter and guest_exit should be no uaccess.
*/
- preempt_disable();
- kvm_guest_enter();
- preempt_enable();
+ local_irq_disable();
+ __kvm_guest_enter();
+ local_irq_enable();
exit_reason = sie64a(vcpu->arch.sie_block,
vcpu->run->s.regs.gprs);
- kvm_guest_exit();
+ local_irq_disable();
+ __kvm_guest_exit();
+ local_irq_enable();
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
rc = vcpu_post_run(vcpu, exit_reason);
@@ -2068,7 +2177,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
kvm_s390_vcpu_start(vcpu);
} else if (is_vcpu_stopped(vcpu)) {
- pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n",
+ pr_err_ratelimited("can't run stopped vcpu %d\n",
vcpu->vcpu_id);
return -EINVAL;
}
@@ -2121,41 +2230,50 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
{
unsigned char archmode = 1;
+ freg_t fprs[NUM_FPRS];
unsigned int px;
u64 clkcomp;
int rc;
+ px = kvm_s390_get_prefix(vcpu);
if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
if (write_guest_abs(vcpu, 163, &archmode, 1))
return -EFAULT;
- gpa = SAVE_AREA_BASE;
+ gpa = 0;
} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
if (write_guest_real(vcpu, 163, &archmode, 1))
return -EFAULT;
- gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
+ gpa = px;
+ } else
+ gpa -= __LC_FPREGS_SAVE_AREA;
+
+ /* manually convert vector registers if necessary */
+ if (MACHINE_HAS_VX) {
+ convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
+ rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+ fprs, 128);
+ } else {
+ rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+ vcpu->run->s.regs.vrs, 128);
}
- rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
- vcpu->arch.guest_fpregs.fprs, 128);
- rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
+ rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
vcpu->run->s.regs.gprs, 128);
- rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
+ rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
&vcpu->arch.sie_block->gpsw, 16);
- px = kvm_s390_get_prefix(vcpu);
- rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
+ rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
&px, 4);
- rc |= write_guest_abs(vcpu,
- gpa + offsetof(struct save_area, fp_ctrl_reg),
- &vcpu->arch.guest_fpregs.fpc, 4);
- rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
+ rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
+ &vcpu->run->s.regs.fpc, 4);
+ rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
&vcpu->arch.sie_block->todpr, 4);
- rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
+ rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
&vcpu->arch.sie_block->cputm, 8);
clkcomp = vcpu->arch.sie_block->ckc >> 8;
- rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
+ rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
&clkcomp, 8);
- rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
+ rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
&vcpu->run->s.regs.acrs, 64);
- rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
+ rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
&vcpu->arch.sie_block->gcr, 128);
return rc ? -EFAULT : 0;
}
@@ -2167,8 +2285,8 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
* copying in vcpu load/put. Lets update our copies before we save
* it into the save area
*/
- save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
- save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+ save_fpu_regs();
+ vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
save_access_regs(vcpu->run->s.regs.acrs);
return kvm_s390_store_status_unloaded(vcpu, addr);
@@ -2195,10 +2313,13 @@ int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
/*
* The guest VXRS are in the host VXRs due to the lazy
- * copying in vcpu load/put. Let's update our copies before we save
- * it into the save area.
+ * copying in vcpu load/put. We can simply call save_fpu_regs()
+ * to save the current register state because we are in the
+ * middle of a load/put cycle.
+ *
+ * Let's update our copies before we save it into the save area.
*/
- save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+ save_fpu_regs();
return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
}
@@ -2206,8 +2327,7 @@ int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
{
kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
- kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu);
- exit_sie_sync(vcpu);
+ kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
}
static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
@@ -2223,8 +2343,7 @@ static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
{
kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
- kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu);
- exit_sie_sync(vcpu);
+ kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
}
void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
@@ -2256,7 +2375,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
__disable_ibs_on_all_vcpus(vcpu->kvm);
}
- atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+ atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
/*
* Another VCPU might have used IBS while we were offline.
* Let's play safe and flush the VCPU at startup.
@@ -2282,7 +2401,7 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
kvm_s390_clear_stop_irq(vcpu);
- atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+ atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
__disable_ibs_on_vcpu(vcpu);
for (i = 0; i < online_vcpus; i++) {
@@ -2316,6 +2435,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
case KVM_CAP_S390_CSS_SUPPORT:
if (!vcpu->kvm->arch.css_support) {
vcpu->kvm->arch.css_support = 1;
+ VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
trace_kvm_s390_enable_css(vcpu->kvm);
}
r = 0;
@@ -2563,7 +2683,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
/* Section: memory related */
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem,
+ const struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
/* A few sanity checks. We can have memory slots which have to be
@@ -2581,8 +2701,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
+ const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
int rc;
@@ -2601,7 +2722,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
mem->guest_phys_addr, mem->memory_size);
if (rc)
- printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
+ pr_warn("failed to commit memory region\n");
return;
}
diff --git a/kernel/arch/s390/kvm/kvm-s390.h b/kernel/arch/s390/kvm/kvm-s390.h
index ca108b90a..1e70e00d3 100644
--- a/kernel/arch/s390/kvm/kvm-s390.h
+++ b/kernel/arch/s390/kvm/kvm-s390.h
@@ -27,6 +27,13 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
#define TDB_FORMAT1 1
#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
+extern debug_info_t *kvm_s390_dbf;
+#define KVM_EVENT(d_loglevel, d_string, d_args...)\
+do { \
+ debug_sprintf_event(kvm_s390_dbf, d_loglevel, d_string "\n", \
+ d_args); \
+} while (0)
+
#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
do { \
debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
@@ -65,6 +72,8 @@ static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu)
static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
{
+ VCPU_EVENT(vcpu, 3, "set prefix of cpu %03u to 0x%x", vcpu->vcpu_id,
+ prefix);
vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT;
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
@@ -166,6 +175,7 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
return kvm->arch.user_cpu_state_ctrl != 0;
}
+/* implemented in interrupt.c */
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
@@ -176,7 +186,25 @@ int __must_check kvm_s390_inject_vm(struct kvm *kvm,
struct kvm_s390_interrupt *s390int);
int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
struct kvm_s390_irq *irq);
-int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
+static inline int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
+ struct kvm_s390_pgm_info *pgm_info)
+{
+ struct kvm_s390_irq irq = {
+ .type = KVM_S390_PROGRAM_INT,
+ .u.pgm = *pgm_info,
+ };
+
+ return kvm_s390_inject_vcpu(vcpu, &irq);
+}
+static inline int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
+{
+ struct kvm_s390_irq irq = {
+ .type = KVM_S390_PROGRAM_INT,
+ .u.pgm.code = code,
+ };
+
+ return kvm_s390_inject_vcpu(vcpu, &irq);
+}
struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
u64 isc_mask, u32 schid);
int kvm_s390_reinject_io_int(struct kvm *kvm,
@@ -203,6 +231,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
/* implemented in kvm-s390.c */
+void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod);
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
@@ -211,22 +240,46 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr);
void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
-void s390_vcpu_block(struct kvm_vcpu *vcpu);
-void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu);
void exit_sie(struct kvm_vcpu *vcpu);
-void exit_sie_sync(struct kvm_vcpu *vcpu);
+void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu);
int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
-/* is cmma enabled */
-bool kvm_s390_cmma_enabled(struct kvm *kvm);
unsigned long kvm_s390_fac_list_mask_size(void);
extern unsigned long kvm_s390_fac_list_mask[];
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
-/* implemented in interrupt.c */
-int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
- struct kvm_s390_pgm_info *pgm_info);
+
+static inline void kvm_s390_vcpu_block_all(struct kvm *kvm)
+{
+ int i;
+ struct kvm_vcpu *vcpu;
+
+ WARN_ON(!mutex_is_locked(&kvm->lock));
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_s390_vcpu_block(vcpu);
+}
+
+static inline void kvm_s390_vcpu_unblock_all(struct kvm *kvm)
+{
+ int i;
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_s390_vcpu_unblock(vcpu);
+}
+
+static inline u64 kvm_s390_get_tod_clock_fast(struct kvm *kvm)
+{
+ u64 rc;
+
+ preempt_disable();
+ rc = get_tod_clock_fast() + kvm->arch.epoch;
+ preempt_enable();
+ return rc;
+}
/**
* kvm_s390_inject_prog_cond - conditionally inject a program check
diff --git a/kernel/arch/s390/kvm/priv.c b/kernel/arch/s390/kvm/priv.c
index d22d8ee1f..d76b51cb4 100644
--- a/kernel/arch/s390/kvm/priv.c
+++ b/kernel/arch/s390/kvm/priv.c
@@ -33,11 +33,9 @@
/* Handle SCK (SET CLOCK) interception */
static int handle_set_clock(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *cpup;
- s64 hostclk, val;
- int i, rc;
+ int rc;
ar_t ar;
- u64 op2;
+ u64 op2, val;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -49,16 +47,8 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
- if (store_tod_clock(&hostclk)) {
- kvm_s390_set_psw_cc(vcpu, 3);
- return 0;
- }
- val = (val - hostclk) & ~0x3fUL;
-
- mutex_lock(&vcpu->kvm->lock);
- kvm_for_each_vcpu(i, cpup, vcpu->kvm)
- cpup->arch.sie_block->epoch = val;
- mutex_unlock(&vcpu->kvm->lock);
+ VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val);
+ kvm_s390_set_tod_clock(vcpu->kvm, val);
kvm_s390_set_psw_cc(vcpu, 0);
return 0;
@@ -98,8 +88,6 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
kvm_s390_set_prefix(vcpu, address);
-
- VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
trace_kvm_s390_handle_prefix(vcpu, 1, address);
return 0;
}
@@ -129,7 +117,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
- VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
+ VCPU_EVENT(vcpu, 3, "STPX: storing prefix 0x%x into 0x%llx", address, operand2);
trace_kvm_s390_handle_prefix(vcpu, 0, address);
return 0;
}
@@ -155,7 +143,7 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
- VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", ga);
+ VCPU_EVENT(vcpu, 3, "STAP: storing cpu address (%u) to 0x%llx", vcpu_id, ga);
trace_kvm_s390_handle_stap(vcpu, ga);
return 0;
}
@@ -167,6 +155,7 @@ static int __skey_check_enable(struct kvm_vcpu *vcpu)
return rc;
rc = s390_enable_skey();
+ VCPU_EVENT(vcpu, 3, "%s", "enabling storage keys for guest");
trace_kvm_s390_skey_related_inst(vcpu);
vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
return rc;
@@ -370,7 +359,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
&fac, sizeof(fac));
if (rc)
return rc;
- VCPU_EVENT(vcpu, 5, "store facility list value %x", fac);
+ VCPU_EVENT(vcpu, 3, "STFL: store facility list 0x%x", fac);
trace_kvm_s390_handle_stfl(vcpu, fac);
return 0;
}
@@ -468,7 +457,7 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
- VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
+ VCPU_EVENT(vcpu, 3, "STIDP: store cpu id 0x%llx", stidp_data);
return 0;
}
@@ -521,7 +510,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
ar_t ar;
vcpu->stat.instruction_stsi++;
- VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
+ VCPU_EVENT(vcpu, 3, "STSI: fc: %u sel1: %u sel2: %u", fc, sel1, sel2);
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -671,7 +660,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
- if (!MACHINE_HAS_PFMF)
+ if (!test_kvm_facility(vcpu->kvm, 8))
return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
@@ -698,10 +687,14 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
case 0x00001000:
end = (start + (1UL << 20)) & ~((1UL << 20) - 1);
break;
- /* We dont support EDAT2
case 0x00002000:
+ /* only support 2G frame size if EDAT2 is available and we are
+ not in 24-bit addressing mode */
+ if (!test_kvm_facility(vcpu->kvm, 78) ||
+ psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_24BIT)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
end = (start + (1UL << 31)) & ~((1UL << 31) - 1);
- break;*/
+ break;
default:
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
}
@@ -754,10 +747,10 @@ static int handle_essa(struct kvm_vcpu *vcpu)
struct gmap *gmap;
int i;
- VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries);
+ VCPU_EVENT(vcpu, 4, "ESSA: release %d pages", entries);
gmap = vcpu->arch.gmap;
vcpu->stat.instruction_essa++;
- if (!kvm_s390_cmma_enabled(vcpu->kvm))
+ if (!vcpu->kvm->arch.use_cmma)
return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
@@ -825,7 +818,7 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
if (ga & 3)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+ VCPU_EVENT(vcpu, 4, "LCTL: r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
nr_regs = ((reg3 - reg1) & 0xf) + 1;
@@ -864,7 +857,7 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
if (ga & 3)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- VCPU_EVENT(vcpu, 5, "stctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+ VCPU_EVENT(vcpu, 4, "STCTL r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
reg = reg1;
@@ -898,7 +891,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
if (ga & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+ VCPU_EVENT(vcpu, 4, "LCTLG: r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
nr_regs = ((reg3 - reg1) & 0xf) + 1;
@@ -936,7 +929,7 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
if (ga & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+ VCPU_EVENT(vcpu, 4, "STCTG r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
reg = reg1;
diff --git a/kernel/arch/s390/kvm/sigp.c b/kernel/arch/s390/kvm/sigp.c
index 72e58bd2b..77c22d685 100644
--- a/kernel/arch/s390/kvm/sigp.c
+++ b/kernel/arch/s390/kvm/sigp.c
@@ -205,9 +205,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
*reg &= 0xffffffff00000000UL;
*reg |= SIGP_STATUS_INCORRECT_STATE;
return SIGP_CC_STATUS_STORED;
- } else if (rc == 0) {
- VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x",
- dst_vcpu->vcpu_id, irq.u.prefix.address);
}
return rc;
@@ -294,12 +291,8 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
u16 cpu_addr, u32 parameter, u64 *status_reg)
{
int rc;
- struct kvm_vcpu *dst_vcpu;
-
- if (cpu_addr >= KVM_MAX_VCPUS)
- return SIGP_CC_NOT_OPERATIONAL;
+ struct kvm_vcpu *dst_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
- dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
if (!dst_vcpu)
return SIGP_CC_NOT_OPERATIONAL;
@@ -371,7 +364,8 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
return rc;
}
-static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code)
+static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code,
+ u16 cpu_addr)
{
if (!vcpu->kvm->arch.user_sigp)
return 0;
@@ -414,9 +408,8 @@ static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code)
default:
vcpu->stat.instruction_sigp_unknown++;
}
-
- VCPU_EVENT(vcpu, 4, "sigp order %u: completely handled in user space",
- order_code);
+ VCPU_EVENT(vcpu, 3, "SIGP: order %u for CPU %d handled in userspace",
+ order_code, cpu_addr);
return 1;
}
@@ -435,7 +428,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
- if (handle_sigp_order_in_user_space(vcpu, order_code))
+ if (handle_sigp_order_in_user_space(vcpu, order_code, cpu_addr))
return -EOPNOTSUPP;
if (r1 % 2)
@@ -481,7 +474,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
if (order_code == SIGP_EXTERNAL_CALL) {
- dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
BUG_ON(dest_vcpu == NULL);
kvm_s390_vcpu_wakeup(dest_vcpu);
diff --git a/kernel/arch/s390/kvm/trace-s390.h b/kernel/arch/s390/kvm/trace-s390.h
index 3208d33a4..cc1d6c683 100644
--- a/kernel/arch/s390/kvm/trace-s390.h
+++ b/kernel/arch/s390/kvm/trace-s390.h
@@ -105,11 +105,22 @@ TRACE_EVENT(kvm_s390_vcpu_start_stop,
{KVM_S390_PROGRAM_INT, "program interrupt"}, \
{KVM_S390_SIGP_SET_PREFIX, "sigp set prefix"}, \
{KVM_S390_RESTART, "sigp restart"}, \
+ {KVM_S390_INT_PFAULT_INIT, "pfault init"}, \
+ {KVM_S390_INT_PFAULT_DONE, "pfault done"}, \
+ {KVM_S390_MCHK, "machine check"}, \
+ {KVM_S390_INT_CLOCK_COMP, "clock comparator"}, \
+ {KVM_S390_INT_CPU_TIMER, "cpu timer"}, \
{KVM_S390_INT_VIRTIO, "virtio interrupt"}, \
{KVM_S390_INT_SERVICE, "sclp interrupt"}, \
{KVM_S390_INT_EMERGENCY, "sigp emergency"}, \
{KVM_S390_INT_EXTERNAL_CALL, "sigp ext call"}
+#define get_irq_name(__type) \
+ (__type > KVM_S390_INT_IO_MAX ? \
+ __print_symbolic(__type, kvm_s390_int_type) : \
+ (__type & KVM_S390_INT_IO_AI_MASK ? \
+ "adapter I/O interrupt" : "subchannel I/O interrupt"))
+
TRACE_EVENT(kvm_s390_inject_vm,
TP_PROTO(__u64 type, __u32 parm, __u64 parm64, int who),
TP_ARGS(type, parm, parm64, who),
@@ -131,22 +142,19 @@ TRACE_EVENT(kvm_s390_inject_vm,
TP_printk("inject%s: type:%x (%s) parm:%x parm64:%llx",
(__entry->who == 1) ? " (from kernel)" :
(__entry->who == 2) ? " (from user)" : "",
- __entry->inttype,
- __print_symbolic(__entry->inttype, kvm_s390_int_type),
+ __entry->inttype, get_irq_name(__entry->inttype),
__entry->parm, __entry->parm64)
);
TRACE_EVENT(kvm_s390_inject_vcpu,
- TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64, \
- int who),
- TP_ARGS(id, type, parm, parm64, who),
+ TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64),
+ TP_ARGS(id, type, parm, parm64),
TP_STRUCT__entry(
__field(int, id)
__field(__u32, inttype)
__field(__u32, parm)
__field(__u64, parm64)
- __field(int, who)
),
TP_fast_assign(
@@ -154,15 +162,12 @@ TRACE_EVENT(kvm_s390_inject_vcpu,
__entry->inttype = type & 0x00000000ffffffff;
__entry->parm = parm;
__entry->parm64 = parm64;
- __entry->who = who;
),
- TP_printk("inject%s (vcpu %d): type:%x (%s) parm:%x parm64:%llx",
- (__entry->who == 1) ? " (from kernel)" :
- (__entry->who == 2) ? " (from user)" : "",
+ TP_printk("inject (vcpu %d): type:%x (%s) parm:%x parm64:%llx",
__entry->id, __entry->inttype,
- __print_symbolic(__entry->inttype, kvm_s390_int_type),
- __entry->parm, __entry->parm64)
+ get_irq_name(__entry->inttype), __entry->parm,
+ __entry->parm64)
);
/*
@@ -189,8 +194,8 @@ TRACE_EVENT(kvm_s390_deliver_interrupt,
TP_printk("deliver interrupt (vcpu %d): type:%x (%s) " \
"data:%08llx %016llx",
__entry->id, __entry->inttype,
- __print_symbolic(__entry->inttype, kvm_s390_int_type),
- __entry->data0, __entry->data1)
+ get_irq_name(__entry->inttype), __entry->data0,
+ __entry->data1)
);
/*
diff --git a/kernel/arch/s390/lib/delay.c b/kernel/arch/s390/lib/delay.c
index 16dc42d83..501dcd4ca 100644
--- a/kernel/arch/s390/lib/delay.c
+++ b/kernel/arch/s390/lib/delay.c
@@ -12,8 +12,10 @@
#include <linux/module.h>
#include <linux/irqflags.h>
#include <linux/interrupt.h>
+#include <linux/irq.h>
#include <asm/vtimer.h>
#include <asm/div64.h>
+#include <asm/idle.h>
void __delay(unsigned long loops)
{
@@ -26,29 +28,26 @@ void __delay(unsigned long loops)
*/
asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
}
+EXPORT_SYMBOL(__delay);
static void __udelay_disabled(unsigned long long usecs)
{
- unsigned long cr0, cr6, new;
- u64 clock_saved, end;
+ unsigned long cr0, cr0_new, psw_mask;
+ struct s390_idle_data idle;
+ u64 end;
end = get_tod_clock() + (usecs << 12);
- clock_saved = local_tick_disable();
__ctl_store(cr0, 0, 0);
- __ctl_store(cr6, 6, 6);
- new = (cr0 & 0xffff00e0) | 0x00000800;
- __ctl_load(new , 0, 0);
- new = 0;
- __ctl_load(new, 6, 6);
- lockdep_off();
- do {
- set_clock_comparator(end);
- enabled_wait();
- } while (get_tod_clock_fast() < end);
- lockdep_on();
+ cr0_new = cr0 & ~CR0_IRQ_SUBCLASS_MASK;
+ cr0_new |= (1UL << (63 - 52)); /* enable clock comparator irq */
+ __ctl_load(cr0_new, 0, 0);
+ psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT;
+ set_clock_comparator(end);
+ set_cpu_flag(CIF_IGNORE_IRQ);
+ psw_idle(&idle, psw_mask);
+ clear_cpu_flag(CIF_IGNORE_IRQ);
+ set_clock_comparator(S390_lowcore.clock_comparator);
__ctl_load(cr0, 0, 0);
- __ctl_load(cr6, 6, 6);
- local_tick_enable(clock_saved);
}
static void __udelay_enabled(unsigned long long usecs)
diff --git a/kernel/arch/s390/lib/find.c b/kernel/arch/s390/lib/find.c
index 922003c1b..d90b9245e 100644
--- a/kernel/arch/s390/lib/find.c
+++ b/kernel/arch/s390/lib/find.c
@@ -1,10 +1,8 @@
/*
* MSB0 numbered special bitops handling.
*
- * On s390x the bits are numbered:
+ * The bits are numbered:
* |0..............63|64............127|128...........191|192...........255|
- * and on s390:
- * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255|
*
* The reason for this bit numbering is the fact that the hardware sets bits
* in a bitmap starting at bit 0 (MSB) and we don't want to scan the bitmap
diff --git a/kernel/arch/s390/lib/spinlock.c b/kernel/arch/s390/lib/spinlock.c
index d6c9991f7..427aa44b3 100644
--- a/kernel/arch/s390/lib/spinlock.c
+++ b/kernel/arch/s390/lib/spinlock.c
@@ -197,7 +197,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev)
}
old = ACCESS_ONCE(rw->lock);
owner = ACCESS_ONCE(rw->owner);
- smp_rmb();
+ smp_mb();
if ((int) old >= 0) {
prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR);
old = prev;
@@ -231,7 +231,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw)
_raw_compare_and_swap(&rw->lock, old, old | 0x80000000))
prev = old;
else
- smp_rmb();
+ smp_mb();
if ((old & 0x7fffffff) == 0 && (int) prev >= 0)
break;
if (MACHINE_HAS_CAD)
diff --git a/kernel/arch/s390/lib/uaccess.c b/kernel/arch/s390/lib/uaccess.c
index 4614d415b..ae4de559e 100644
--- a/kernel/arch/s390/lib/uaccess.c
+++ b/kernel/arch/s390/lib/uaccess.c
@@ -15,7 +15,7 @@
#include <asm/mmu_context.h>
#include <asm/facility.h>
-static struct static_key have_mvcos = STATIC_KEY_INIT_FALSE;
+static DEFINE_STATIC_KEY_FALSE(have_mvcos);
static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
unsigned long size)
@@ -104,7 +104,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n)
{
- if (static_key_false(&have_mvcos))
+ if (static_branch_likely(&have_mvcos))
return copy_from_user_mvcos(to, from, n);
return copy_from_user_mvcp(to, from, n);
}
@@ -177,7 +177,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
{
- if (static_key_false(&have_mvcos))
+ if (static_branch_likely(&have_mvcos))
return copy_to_user_mvcos(to, from, n);
return copy_to_user_mvcs(to, from, n);
}
@@ -240,7 +240,7 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user
unsigned long __copy_in_user(void __user *to, const void __user *from, unsigned long n)
{
- if (static_key_false(&have_mvcos))
+ if (static_branch_likely(&have_mvcos))
return copy_in_user_mvcos(to, from, n);
return copy_in_user_mvc(to, from, n);
}
@@ -312,7 +312,7 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
unsigned long __clear_user(void __user *to, unsigned long size)
{
- if (static_key_false(&have_mvcos))
+ if (static_branch_likely(&have_mvcos))
return clear_user_mvcos(to, size);
return clear_user_xc(to, size);
}
@@ -370,23 +370,10 @@ long __strncpy_from_user(char *dst, const char __user *src, long size)
}
EXPORT_SYMBOL(__strncpy_from_user);
-/*
- * The "old" uaccess variant without mvcos can be enforced with the
- * uaccess_primary kernel parameter. This is mainly for debugging purposes.
- */
-static int uaccess_primary __initdata;
-
-static int __init parse_uaccess_pt(char *__unused)
-{
- uaccess_primary = 1;
- return 0;
-}
-early_param("uaccess_primary", parse_uaccess_pt);
-
static int __init uaccess_init(void)
{
- if (!uaccess_primary && test_facility(27))
- static_key_slow_inc(&have_mvcos);
+ if (test_facility(27))
+ static_branch_enable(&have_mvcos);
return 0;
}
early_initcall(uaccess_init);
diff --git a/kernel/arch/s390/mm/extable.c b/kernel/arch/s390/mm/extable.c
index 4d1ee8886..18c8b819b 100644
--- a/kernel/arch/s390/mm/extable.c
+++ b/kernel/arch/s390/mm/extable.c
@@ -52,12 +52,16 @@ void sort_extable(struct exception_table_entry *start,
int i;
/* Normalize entries to being relative to the start of the section */
- for (p = start, i = 0; p < finish; p++, i += 8)
+ for (p = start, i = 0; p < finish; p++, i += 8) {
p->insn += i;
+ p->fixup += i + 4;
+ }
sort(start, finish - start, sizeof(*start), cmp_ex, NULL);
/* Denormalize all entries */
- for (p = start, i = 0; p < finish; p++, i += 8)
+ for (p = start, i = 0; p < finish; p++, i += 8) {
p->insn -= i;
+ p->fixup -= i + 4;
+ }
}
#ifdef CONFIG_MODULES
diff --git a/kernel/arch/s390/mm/extmem.c b/kernel/arch/s390/mm/extmem.c
index 23c496957..18fccc303 100644
--- a/kernel/arch/s390/mm/extmem.c
+++ b/kernel/arch/s390/mm/extmem.c
@@ -18,6 +18,7 @@
#include <linux/bootmem.h>
#include <linux/ctype.h>
#include <linux/ioport.h>
+#include <asm/diag.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/ebcdic.h>
@@ -112,6 +113,7 @@ dcss_set_subcodes(void)
ry = DCSS_FINDSEGX;
strcpy(name, "dummy");
+ diag_stat_inc(DIAG_STAT_X064);
asm volatile(
" diag %0,%1,0x64\n"
"0: ipm %2\n"
@@ -205,6 +207,7 @@ dcss_diag(int *func, void *parameter,
ry = (unsigned long) *func;
/* 64-bit Diag x'64' new subcode, keep in 64-bit addressing mode */
+ diag_stat_inc(DIAG_STAT_X064);
if (*func > DCSS_SEGEXT)
asm volatile(
" diag %0,%1,0x64\n"
diff --git a/kernel/arch/s390/mm/fault.c b/kernel/arch/s390/mm/fault.c
index 4c8f5d7f9..ec1a30d0d 100644
--- a/kernel/arch/s390/mm/fault.c
+++ b/kernel/arch/s390/mm/fault.c
@@ -30,6 +30,7 @@
#include <linux/uaccess.h>
#include <linux/hugetlb.h>
#include <asm/asm-offsets.h>
+#include <asm/diag.h>
#include <asm/pgtable.h>
#include <asm/irq.h>
#include <asm/mmu_context.h>
@@ -589,7 +590,7 @@ int pfault_init(void)
.reffcode = 0,
.refdwlen = 5,
.refversn = 2,
- .refgaddr = __LC_CURRENT_PID,
+ .refgaddr = __LC_LPP,
.refselmk = 1ULL << 48,
.refcmpmk = 1ULL << 48,
.reserved = __PF_RES_FIELD };
@@ -597,6 +598,7 @@ int pfault_init(void)
if (pfault_disable)
return -1;
+ diag_stat_inc(DIAG_STAT_X258);
asm volatile(
" diag %1,%0,0x258\n"
"0: j 2f\n"
@@ -618,6 +620,7 @@ void pfault_fini(void)
if (pfault_disable)
return;
+ diag_stat_inc(DIAG_STAT_X258);
asm volatile(
" diag %0,0,0x258\n"
"0:\n"
@@ -646,7 +649,7 @@ static void pfault_interrupt(struct ext_code ext_code,
return;
inc_irq_stat(IRQEXT_PFL);
/* Get the token (= pid of the affected task). */
- pid = sizeof(void *) == 4 ? param32 : param64;
+ pid = param64 & LPP_PFAULT_PID_MASK;
rcu_read_lock();
tsk = find_task_by_pid_ns(pid, &init_pid_ns);
if (tsk)
diff --git a/kernel/arch/s390/mm/gup.c b/kernel/arch/s390/mm/gup.c
index 1eb41bb30..12bbf0e84 100644
--- a/kernel/arch/s390/mm/gup.c
+++ b/kernel/arch/s390/mm/gup.c
@@ -30,6 +30,9 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
do {
pte = *ptep;
barrier();
+ /* Similar to the PMD case, NUMA hinting must take slow path */
+ if (pte_protnone(pte))
+ return 0;
if ((pte_val(pte) & mask) != 0)
return 0;
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
@@ -125,6 +128,13 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
if (pmd_none(pmd) || pmd_trans_splitting(pmd))
return 0;
if (unlikely(pmd_large(pmd))) {
+ /*
+ * NUMA hinting faults need to be handled in the GUP
+ * slowpath for accounting purposes and so that they
+ * can be serialised against THP migration.
+ */
+ if (pmd_protnone(pmd))
+ return 0;
if (!gup_huge_pmd(pmdp, pmd, addr, next,
write, pages, nr))
return 0;
diff --git a/kernel/arch/s390/mm/hugetlbpage.c b/kernel/arch/s390/mm/hugetlbpage.c
index e617e74b7..f81096b69 100644
--- a/kernel/arch/s390/mm/hugetlbpage.c
+++ b/kernel/arch/s390/mm/hugetlbpage.c
@@ -40,6 +40,7 @@ static inline pmd_t __pte_to_pmd(pte_t pte)
pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT);
pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10;
pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10;
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13;
} else
pmd_val(pmd) = _SEGMENT_ENTRY_INVALID;
return pmd;
@@ -78,6 +79,7 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT);
pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) >> 10;
pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) >> 10;
+ pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13;
} else
pte_val(pte) = _PAGE_INVALID;
return pte;
@@ -86,31 +88,16 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
- pmd_t pmd;
+ pmd_t pmd = __pte_to_pmd(pte);
- pmd = __pte_to_pmd(pte);
- if (!MACHINE_HAS_HPAGE) {
- /* Emulated huge ptes loose the dirty and young bit */
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
- pmd_val(pmd) |= pte_page(pte)[1].index;
- } else
- pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
*(pmd_t *) ptep = pmd;
}
pte_t huge_ptep_get(pte_t *ptep)
{
- unsigned long origin;
- pmd_t pmd;
+ pmd_t pmd = *(pmd_t *) ptep;
- pmd = *(pmd_t *) ptep;
- if (!MACHINE_HAS_HPAGE && pmd_present(pmd)) {
- origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN;
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
- pmd_val(pmd) |= *(unsigned long *) origin;
- /* Emulated huge ptes are young and dirty by definition */
- pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG | _SEGMENT_ENTRY_DIRTY;
- }
return __pmd_to_pte(pmd);
}
@@ -125,45 +112,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
return pte;
}
-int arch_prepare_hugepage(struct page *page)
-{
- unsigned long addr = page_to_phys(page);
- pte_t pte;
- pte_t *ptep;
- int i;
-
- if (MACHINE_HAS_HPAGE)
- return 0;
-
- ptep = (pte_t *) pte_alloc_one(&init_mm, addr);
- if (!ptep)
- return -ENOMEM;
-
- pte_val(pte) = addr;
- for (i = 0; i < PTRS_PER_PTE; i++) {
- set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte);
- pte_val(pte) += PAGE_SIZE;
- }
- page[1].index = (unsigned long) ptep;
- return 0;
-}
-
-void arch_release_hugepage(struct page *page)
-{
- pte_t *ptep;
-
- if (MACHINE_HAS_HPAGE)
- return;
-
- ptep = (pte_t *) page[1].index;
- if (!ptep)
- return;
- clear_table((unsigned long *) ptep, _PAGE_INVALID,
- PTRS_PER_PTE * sizeof(pte_t));
- page_table_free(&init_mm, (unsigned long *) ptep);
- page[1].index = 0;
-}
-
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
@@ -193,17 +141,9 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
return (pte_t *) pmdp;
}
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
-{
- return 0;
-}
-
int pmd_huge(pmd_t pmd)
{
- if (!MACHINE_HAS_HPAGE)
- return 0;
-
- return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE);
+ return pmd_large(pmd);
}
int pud_huge(pud_t pud)
diff --git a/kernel/arch/s390/mm/init.c b/kernel/arch/s390/mm/init.c
index 80875c43a..c722400c7 100644
--- a/kernel/arch/s390/mm/init.c
+++ b/kernel/arch/s390/mm/init.c
@@ -27,6 +27,7 @@
#include <linux/initrd.h>
#include <linux/export.h>
#include <linux/gfp.h>
+#include <linux/memblock.h>
#include <asm/processor.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -47,37 +48,13 @@ EXPORT_SYMBOL(zero_page_mask);
static void __init setup_zero_pages(void)
{
- struct cpuid cpu_id;
unsigned int order;
struct page *page;
int i;
- get_cpu_id(&cpu_id);
- switch (cpu_id.machine) {
- case 0x9672: /* g5 */
- case 0x2064: /* z900 */
- case 0x2066: /* z900 */
- case 0x2084: /* z990 */
- case 0x2086: /* z990 */
- case 0x2094: /* z9-109 */
- case 0x2096: /* z9-109 */
- order = 0;
- break;
- case 0x2097: /* z10 */
- case 0x2098: /* z10 */
- case 0x2817: /* z196 */
- case 0x2818: /* z196 */
- order = 2;
- break;
- case 0x2827: /* zEC12 */
- case 0x2828: /* zEC12 */
- order = 5;
- break;
- case 0x2964: /* z13 */
- default:
- order = 7;
- break;
- }
+ /* Latest machines require a mapping granularity of 512KB */
+ order = 7;
+
/* Limit number of empty zero pages for small memory sizes */
while (order > 2 && (totalram_pages >> 10) < (1UL << order))
order--;
@@ -138,7 +115,7 @@ void __init mem_init(void)
cpumask_set_cpu(0, mm_cpumask(&init_mm));
atomic_set(&init_mm.context.attach_count, 1);
- max_mapnr = max_low_pfn;
+ set_max_mapnr(max_low_pfn);
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
/* Setup guest page hinting */
@@ -168,39 +145,38 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
{
- unsigned long zone_start_pfn, zone_end_pfn, nr_pages;
+ unsigned long normal_end_pfn = PFN_DOWN(memblock_end_of_DRAM());
+ unsigned long dma_end_pfn = PFN_DOWN(MAX_DMA_ADDRESS);
unsigned long start_pfn = PFN_DOWN(start);
unsigned long size_pages = PFN_DOWN(size);
- struct zone *zone;
- int rc;
+ unsigned long nr_pages;
+ int rc, zone_enum;
rc = vmem_add_mapping(start, size);
if (rc)
return rc;
- for_each_zone(zone) {
- if (zone_idx(zone) != ZONE_MOVABLE) {
- /* Add range within existing zone limits */
- zone_start_pfn = zone->zone_start_pfn;
- zone_end_pfn = zone->zone_start_pfn +
- zone->spanned_pages;
+
+ while (size_pages > 0) {
+ if (start_pfn < dma_end_pfn) {
+ nr_pages = (start_pfn + size_pages > dma_end_pfn) ?
+ dma_end_pfn - start_pfn : size_pages;
+ zone_enum = ZONE_DMA;
+ } else if (start_pfn < normal_end_pfn) {
+ nr_pages = (start_pfn + size_pages > normal_end_pfn) ?
+ normal_end_pfn - start_pfn : size_pages;
+ zone_enum = ZONE_NORMAL;
} else {
- /* Add remaining range to ZONE_MOVABLE */
- zone_start_pfn = start_pfn;
- zone_end_pfn = start_pfn + size_pages;
+ nr_pages = size_pages;
+ zone_enum = ZONE_MOVABLE;
}
- if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn)
- continue;
- nr_pages = (start_pfn + size_pages > zone_end_pfn) ?
- zone_end_pfn - start_pfn : size_pages;
- rc = __add_pages(nid, zone, start_pfn, nr_pages);
+ rc = __add_pages(nid, NODE_DATA(nid)->node_zones + zone_enum,
+ start_pfn, size_pages);
if (rc)
break;
start_pfn += nr_pages;
size_pages -= nr_pages;
- if (!size_pages)
- break;
}
if (rc)
vmem_remove_mapping(start, size);
@@ -213,7 +189,7 @@ unsigned long memory_block_size_bytes(void)
* Make sure the memory block size is always greater
* or equal than the memory increment size.
*/
- return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp_get_rzm());
+ return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp.rzm);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
diff --git a/kernel/arch/s390/mm/mem_detect.c b/kernel/arch/s390/mm/mem_detect.c
index 0f3604395..e00f0d5d2 100644
--- a/kernel/arch/s390/mm/mem_detect.c
+++ b/kernel/arch/s390/mm/mem_detect.c
@@ -31,8 +31,8 @@ void __init detect_memory_memblock(void)
unsigned long addr, size;
int type;
- rzm = sclp_get_rzm();
- rnmax = sclp_get_rnmax();
+ rzm = sclp.rzm;
+ rnmax = sclp.rnmax;
memsize = rzm * rnmax;
if (!rzm)
rzm = 1ULL << 17;
diff --git a/kernel/arch/s390/mm/mmap.c b/kernel/arch/s390/mm/mmap.c
index 6e552af08..ea01477b4 100644
--- a/kernel/arch/s390/mm/mmap.c
+++ b/kernel/arch/s390/mm/mmap.c
@@ -31,9 +31,6 @@
#include <linux/security.h>
#include <asm/pgalloc.h>
-unsigned long mmap_rnd_mask;
-static unsigned long mmap_align_mask;
-
static unsigned long stack_maxrandom_size(void)
{
if (!(current->flags & PF_RANDOMIZE))
@@ -62,10 +59,7 @@ static inline int mmap_is_legacy(void)
unsigned long arch_mmap_rnd(void)
{
- if (is_32bit_task())
- return (get_random_int() & 0x7ff) << PAGE_SHIFT;
- else
- return (get_random_int() & mmap_rnd_mask) << PAGE_SHIFT;
+ return (get_random_int() & MMAP_RND_MASK) << PAGE_SHIFT;
}
static unsigned long mmap_base_legacy(unsigned long rnd)
@@ -92,7 +86,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct vm_unmapped_area_info info;
- int do_color_align;
if (len > TASK_SIZE - mmap_min_addr)
return -ENOMEM;
@@ -108,15 +101,14 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
return addr;
}
- do_color_align = 0;
- if (filp || (flags & MAP_SHARED))
- do_color_align = !is_32bit_task();
-
info.flags = 0;
info.length = len;
info.low_limit = mm->mmap_base;
info.high_limit = TASK_SIZE;
- info.align_mask = do_color_align ? (mmap_align_mask << PAGE_SHIFT) : 0;
+ if (filp || (flags & MAP_SHARED))
+ info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
+ else
+ info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
return vm_unmapped_area(&info);
}
@@ -130,7 +122,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
struct vm_unmapped_area_info info;
- int do_color_align;
/* requested length too big for entire address space */
if (len > TASK_SIZE - mmap_min_addr)
@@ -148,15 +139,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
return addr;
}
- do_color_align = 0;
- if (filp || (flags & MAP_SHARED))
- do_color_align = !is_32bit_task();
-
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = max(PAGE_SIZE, mmap_min_addr);
info.high_limit = mm->mmap_base;
- info.align_mask = do_color_align ? (mmap_align_mask << PAGE_SHIFT) : 0;
+ if (filp || (flags & MAP_SHARED))
+ info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
+ else
+ info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
addr = vm_unmapped_area(&info);
@@ -254,35 +244,3 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
mm->get_unmapped_area = s390_get_unmapped_area_topdown;
}
}
-
-static int __init setup_mmap_rnd(void)
-{
- struct cpuid cpu_id;
-
- get_cpu_id(&cpu_id);
- switch (cpu_id.machine) {
- case 0x9672:
- case 0x2064:
- case 0x2066:
- case 0x2084:
- case 0x2086:
- case 0x2094:
- case 0x2096:
- case 0x2097:
- case 0x2098:
- case 0x2817:
- case 0x2818:
- case 0x2827:
- case 0x2828:
- mmap_rnd_mask = 0x7ffUL;
- mmap_align_mask = 0UL;
- break;
- case 0x2964: /* z13 */
- default:
- mmap_rnd_mask = 0x3ff80UL;
- mmap_align_mask = 0x7fUL;
- break;
- }
- return 0;
-}
-early_initcall(setup_mmap_rnd);
diff --git a/kernel/arch/s390/mm/pgtable.c b/kernel/arch/s390/mm/pgtable.c
index b33f66110..54ef3bc01 100644
--- a/kernel/arch/s390/mm/pgtable.c
+++ b/kernel/arch/s390/mm/pgtable.c
@@ -10,11 +10,7 @@
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/smp.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/quicklist.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/swapops.h>
@@ -28,12 +24,9 @@
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
-#define ALLOC_ORDER 2
-#define FRAG_MASK 0x03
-
unsigned long *crst_table_alloc(struct mm_struct *mm)
{
- struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+ struct page *page = alloc_pages(GFP_KERNEL, 2);
if (!page)
return NULL;
@@ -42,7 +35,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm)
void crst_table_free(struct mm_struct *mm, unsigned long *table)
{
- free_pages((unsigned long) table, ALLOC_ORDER);
+ free_pages((unsigned long) table, 2);
}
static void __crst_table_upgrade(void *arg)
@@ -176,7 +169,7 @@ struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
spin_lock_init(&gmap->guest_table_lock);
gmap->mm = mm;
- page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+ page = alloc_pages(GFP_KERNEL, 2);
if (!page)
goto out_free;
page->index = 0;
@@ -247,7 +240,7 @@ void gmap_free(struct gmap *gmap)
/* Free all segment & region tables. */
list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
- __free_pages(page, ALLOC_ORDER);
+ __free_pages(page, 2);
gmap_radix_tree_free(&gmap->guest_to_host);
gmap_radix_tree_free(&gmap->host_to_guest);
down_write(&gmap->mm->mmap_sem);
@@ -287,7 +280,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
unsigned long *new;
/* since we dont free the gmap table until gmap_free we can unlock */
- page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+ page = alloc_pages(GFP_KERNEL, 2);
if (!page)
return -ENOMEM;
new = (unsigned long *) page_to_phys(page);
@@ -302,7 +295,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
}
spin_unlock(&gmap->mm->page_table_lock);
if (page)
- __free_pages(page, ALLOC_ORDER);
+ __free_pages(page, 2);
return 0;
}
@@ -795,40 +788,6 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
}
EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
-static inline int page_table_with_pgste(struct page *page)
-{
- return atomic_read(&page->_mapcount) == 0;
-}
-
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
-{
- struct page *page;
- unsigned long *table;
-
- page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
- if (!page)
- return NULL;
- if (!pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
- }
- atomic_set(&page->_mapcount, 0);
- table = (unsigned long *) page_to_phys(page);
- clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
- clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
- return table;
-}
-
-static inline void page_table_free_pgste(unsigned long *table)
-{
- struct page *page;
-
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
- __free_page(page);
-}
-
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned long key, bool nq)
{
@@ -957,20 +916,6 @@ __initcall(page_table_register_sysctl);
#else /* CONFIG_PGSTE */
-static inline int page_table_with_pgste(struct page *page)
-{
- return 0;
-}
-
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
-{
- return NULL;
-}
-
-static inline void page_table_free_pgste(unsigned long *table)
-{
-}
-
static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
unsigned long vmaddr)
{
@@ -994,44 +939,55 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
*/
unsigned long *page_table_alloc(struct mm_struct *mm)
{
- unsigned long *uninitialized_var(table);
- struct page *uninitialized_var(page);
+ unsigned long *table;
+ struct page *page;
unsigned int mask, bit;
- if (mm_alloc_pgste(mm))
- return page_table_alloc_pgste(mm);
- /* Allocate fragments of a 4K page as 1K/2K page table */
- spin_lock_bh(&mm->context.list_lock);
- mask = FRAG_MASK;
- if (!list_empty(&mm->context.pgtable_list)) {
- page = list_first_entry(&mm->context.pgtable_list,
- struct page, lru);
- table = (unsigned long *) page_to_phys(page);
- mask = atomic_read(&page->_mapcount);
- mask = mask | (mask >> 4);
- }
- if ((mask & FRAG_MASK) == FRAG_MASK) {
- spin_unlock_bh(&mm->context.list_lock);
- page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
- if (!page)
- return NULL;
- if (!pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
+ /* Try to get a fragment of a 4K page as a 2K page table */
+ if (!mm_alloc_pgste(mm)) {
+ table = NULL;
+ spin_lock_bh(&mm->context.list_lock);
+ if (!list_empty(&mm->context.pgtable_list)) {
+ page = list_first_entry(&mm->context.pgtable_list,
+ struct page, lru);
+ mask = atomic_read(&page->_mapcount);
+ mask = (mask | (mask >> 4)) & 3;
+ if (mask != 3) {
+ table = (unsigned long *) page_to_phys(page);
+ bit = mask & 1; /* =1 -> second 2K */
+ if (bit)
+ table += PTRS_PER_PTE;
+ atomic_xor_bits(&page->_mapcount, 1U << bit);
+ list_del(&page->lru);
+ }
}
+ spin_unlock_bh(&mm->context.list_lock);
+ if (table)
+ return table;
+ }
+ /* Allocate a fresh page */
+ page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+ if (!page)
+ return NULL;
+ if (!pgtable_page_ctor(page)) {
+ __free_page(page);
+ return NULL;
+ }
+ /* Initialize page table */
+ table = (unsigned long *) page_to_phys(page);
+ if (mm_alloc_pgste(mm)) {
+ /* Return 4K page table with PGSTEs */
+ atomic_set(&page->_mapcount, 3);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+ clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+ } else {
+ /* Return the first 2K fragment of the page */
atomic_set(&page->_mapcount, 1);
- table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_INVALID, PAGE_SIZE);
spin_lock_bh(&mm->context.list_lock);
list_add(&page->lru, &mm->context.pgtable_list);
- } else {
- for (bit = 1; mask & bit; bit <<= 1)
- table += PTRS_PER_PTE;
- mask = atomic_xor_bits(&page->_mapcount, bit);
- if ((mask & FRAG_MASK) == FRAG_MASK)
- list_del(&page->lru);
+ spin_unlock_bh(&mm->context.list_lock);
}
- spin_unlock_bh(&mm->context.list_lock);
return table;
}
@@ -1041,37 +997,23 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
unsigned int bit, mask;
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (page_table_with_pgste(page))
- return page_table_free_pgste(table);
- /* Free 1K/2K page table fragment of a 4K page */
- bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
- spin_lock_bh(&mm->context.list_lock);
- if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
- list_del(&page->lru);
- mask = atomic_xor_bits(&page->_mapcount, bit);
- if (mask & FRAG_MASK)
- list_add(&page->lru, &mm->context.pgtable_list);
- spin_unlock_bh(&mm->context.list_lock);
- if (mask == 0) {
- pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
- __free_page(page);
+ if (!mm_alloc_pgste(mm)) {
+ /* Free 2K page table fragment of a 4K page */
+ bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
+ spin_lock_bh(&mm->context.list_lock);
+ mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
+ if (mask & 3)
+ list_add(&page->lru, &mm->context.pgtable_list);
+ else
+ list_del(&page->lru);
+ spin_unlock_bh(&mm->context.list_lock);
+ if (mask != 0)
+ return;
}
-}
-
-static void __page_table_free_rcu(void *table, unsigned bit)
-{
- struct page *page;
- if (bit == FRAG_MASK)
- return page_table_free_pgste(table);
- /* Free 1K/2K page table fragment of a 4K page */
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
- pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
- __free_page(page);
- }
+ pgtable_page_dtor(page);
+ atomic_set(&page->_mapcount, -1);
+ __free_page(page);
}
void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
@@ -1083,34 +1025,45 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
mm = tlb->mm;
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (page_table_with_pgste(page)) {
+ if (mm_alloc_pgste(mm)) {
gmap_unlink(mm, table, vmaddr);
- table = (unsigned long *) (__pa(table) | FRAG_MASK);
+ table = (unsigned long *) (__pa(table) | 3);
tlb_remove_table(tlb, table);
return;
}
- bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
+ bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
spin_lock_bh(&mm->context.list_lock);
- if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
- list_del(&page->lru);
- mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
- if (mask & FRAG_MASK)
+ mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
+ if (mask & 3)
list_add_tail(&page->lru, &mm->context.pgtable_list);
+ else
+ list_del(&page->lru);
spin_unlock_bh(&mm->context.list_lock);
- table = (unsigned long *) (__pa(table) | (bit << 4));
+ table = (unsigned long *) (__pa(table) | (1U << bit));
tlb_remove_table(tlb, table);
}
static void __tlb_remove_table(void *_table)
{
- const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK;
- void *table = (void *)((unsigned long) _table & ~mask);
- unsigned type = (unsigned long) _table & mask;
-
- if (type)
- __page_table_free_rcu(table, type);
- else
- free_pages((unsigned long) table, ALLOC_ORDER);
+ unsigned int mask = (unsigned long) _table & 3;
+ void *table = (void *)((unsigned long) _table ^ mask);
+ struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+
+ switch (mask) {
+ case 0: /* pmd or pud */
+ free_pages((unsigned long) table, 2);
+ break;
+ case 1: /* lower 2K of a 4K page table */
+ case 2: /* higher 2K of a 4K page table */
+ if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0)
+ break;
+ /* fallthrough */
+ case 3: /* 4K page table with pgstes */
+ pgtable_page_dtor(page);
+ atomic_set(&page->_mapcount, -1);
+ __free_page(page);
+ break;
+ }
}
static void tlb_remove_table_smp_sync(void *arg)
diff --git a/kernel/arch/s390/net/bpf_jit.h b/kernel/arch/s390/net/bpf_jit.h
index de156ba3b..f010c93a8 100644
--- a/kernel/arch/s390/net/bpf_jit.h
+++ b/kernel/arch/s390/net/bpf_jit.h
@@ -28,11 +28,16 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
* | old backchain | |
* +---------------+ |
* | r15 - r6 | |
+ * +---------------+ |
+ * | 4 byte align | |
+ * | tail_call_cnt | |
* BFP -> +===============+ |
* | | |
* | BPF stack | |
* | | |
* +---------------+ |
+ * | 8 byte skbp | |
+ * R15+170 -> +---------------+ |
* | 8 byte hlen | |
* R15+168 -> +---------------+ |
* | 4 byte align | |
@@ -46,13 +51,17 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
* R15 -> +---------------+ + low
*
* We get 160 bytes stack space from calling function, but only use
- * 11 * 8 byte (old backchain + r15 - r6) for storing registers.
+ * 12 * 8 byte for old backchain, r15..r6, and tail_call_cnt.
*/
-#define STK_SPACE (MAX_BPF_STACK + 8 + 4 + 4 + 160)
-#define STK_160_UNUSED (160 - 11 * 8)
+#define STK_SPACE (MAX_BPF_STACK + 8 + 8 + 4 + 4 + 160)
+#define STK_160_UNUSED (160 - 12 * 8)
#define STK_OFF (STK_SPACE - STK_160_UNUSED)
#define STK_OFF_TMP 160 /* Offset of tmp buffer on stack */
#define STK_OFF_HLEN 168 /* Offset of SKB header length on stack */
+#define STK_OFF_SKBP 170 /* Offset of SKB pointer on stack */
+
+#define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */
+#define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */
/* Offset to skip condition code check */
#define OFF_OK 4
diff --git a/kernel/arch/s390/net/bpf_jit_comp.c b/kernel/arch/s390/net/bpf_jit_comp.c
index dc2d7aa56..9a0c4c22e 100644
--- a/kernel/arch/s390/net/bpf_jit_comp.c
+++ b/kernel/arch/s390/net/bpf_jit_comp.c
@@ -21,6 +21,7 @@
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/init.h>
+#include <linux/bpf.h>
#include <asm/cacheflush.h>
#include <asm/dis.h>
#include "bpf_jit.h"
@@ -40,15 +41,19 @@ struct bpf_jit {
int base_ip; /* Base address for literal pool */
int ret0_ip; /* Address of return 0 */
int exit_ip; /* Address of exit */
+ int tail_call_start; /* Tail call start offset */
+ int labels[1]; /* Labels for local jumps */
};
-#define BPF_SIZE_MAX 4096 /* Max size for program */
+#define BPF_SIZE_MAX 0x7ffff /* Max size for program (20 bit signed displ) */
#define SEEN_SKB 1 /* skb access */
#define SEEN_MEM 2 /* use mem[] for temporary storage */
#define SEEN_RET0 4 /* ret0_ip points to a valid return 0 */
#define SEEN_LITERAL 8 /* code uses literals */
#define SEEN_FUNC 16 /* calls C functions */
+#define SEEN_TAIL_CALL 32 /* code uses tail calls */
+#define SEEN_SKB_CHANGE 64 /* code changes skb data */
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
/*
@@ -60,6 +65,7 @@ struct bpf_jit {
#define REG_L (__MAX_BPF_REG+3) /* Literal pool register */
#define REG_15 (__MAX_BPF_REG+4) /* Register 15 */
#define REG_0 REG_W0 /* Register 0 */
+#define REG_1 REG_W1 /* Register 1 */
#define REG_2 BPF_REG_1 /* Register 2 */
#define REG_14 BPF_REG_0 /* Register 14 */
@@ -198,19 +204,11 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
_EMIT6(op1 | __disp, op2); \
})
-#define EMIT6_DISP(op1, op2, b1, b2, b3, disp) \
-({ \
- _EMIT6_DISP(op1 | reg(b1, b2) << 16 | \
- reg_high(b3) << 8, op2, disp); \
- REG_SET_SEEN(b1); \
- REG_SET_SEEN(b2); \
- REG_SET_SEEN(b3); \
-})
-
#define _EMIT6_DISP_LH(op1, op2, disp) \
({ \
- unsigned int __disp_h = ((u32)disp) & 0xff000; \
- unsigned int __disp_l = ((u32)disp) & 0x00fff; \
+ u32 _disp = (u32) disp; \
+ unsigned int __disp_h = _disp & 0xff000; \
+ unsigned int __disp_l = _disp & 0x00fff; \
_EMIT6(op1 | __disp_l, op2 | __disp_h >> 4); \
})
@@ -223,6 +221,24 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
REG_SET_SEEN(b3); \
})
+#define EMIT6_PCREL_LABEL(op1, op2, b1, b2, label, mask) \
+({ \
+ int rel = (jit->labels[label] - jit->prg) >> 1; \
+ _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), \
+ op2 | mask << 12); \
+ REG_SET_SEEN(b1); \
+ REG_SET_SEEN(b2); \
+})
+
+#define EMIT6_PCREL_IMM_LABEL(op1, op2, b1, imm, label, mask) \
+({ \
+ int rel = (jit->labels[label] - jit->prg) >> 1; \
+ _EMIT6(op1 | (reg_high(b1) | mask) << 16 | \
+ (rel & 0xffff), op2 | (imm & 0xff) << 8); \
+ REG_SET_SEEN(b1); \
+ BUILD_BUG_ON(((unsigned long) imm) > 0xff); \
+})
+
#define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \
({ \
/* Branch instruction needs 6 bytes */ \
@@ -286,7 +302,7 @@ static void jit_fill_hole(void *area, unsigned int size)
*/
static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
{
- u32 off = 72 + (rs - 6) * 8;
+ u32 off = STK_OFF_R6 + (rs - 6) * 8;
if (rs == re)
/* stg %rs,off(%r15) */
@@ -301,7 +317,7 @@ static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
*/
static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re)
{
- u32 off = 72 + (rs - 6) * 8;
+ u32 off = STK_OFF_R6 + (rs - 6) * 8;
if (jit->seen & SEEN_STACK)
off += STK_OFF;
@@ -367,13 +383,43 @@ static void save_restore_regs(struct bpf_jit *jit, int op)
}
/*
+ * For SKB access %b1 contains the SKB pointer. For "bpf_jit.S"
+ * we store the SKB header length on the stack and the SKB data
+ * pointer in REG_SKB_DATA.
+ */
+static void emit_load_skb_data_hlen(struct bpf_jit *jit)
+{
+ /* Header length: llgf %w1,<len>(%b1) */
+ EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_1,
+ offsetof(struct sk_buff, len));
+ /* s %w1,<data_len>(%b1) */
+ EMIT4_DISP(0x5b000000, REG_W1, BPF_REG_1,
+ offsetof(struct sk_buff, data_len));
+ /* stg %w1,ST_OFF_HLEN(%r0,%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_HLEN);
+ /* lg %skb_data,data_off(%b1) */
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
+ BPF_REG_1, offsetof(struct sk_buff, data));
+}
+
+/*
* Emit function prologue
*
* Save registers and create stack frame if necessary.
* See stack frame layout desription in "bpf_jit.h"!
*/
-static void bpf_jit_prologue(struct bpf_jit *jit)
+static void bpf_jit_prologue(struct bpf_jit *jit, bool is_classic)
{
+ if (jit->seen & SEEN_TAIL_CALL) {
+ /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
+ _EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
+ } else {
+ /* j tail_call_start: NOP if no tail calls are used */
+ EMIT4_PCREL(0xa7f40000, 6);
+ _EMIT2(0);
+ }
+ /* Tail calls have to skip above initialization */
+ jit->tail_call_start = jit->prg;
/* Save registers */
save_restore_regs(jit, REGS_SAVE);
/* Setup literal pool */
@@ -396,32 +442,21 @@ static void bpf_jit_prologue(struct bpf_jit *jit)
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
REG_15, 152);
}
- /*
- * For SKB access %b1 contains the SKB pointer. For "bpf_jit.S"
- * we store the SKB header length on the stack and the SKB data
- * pointer in REG_SKB_DATA.
- */
- if (jit->seen & SEEN_SKB) {
- /* Header length: llgf %w1,<len>(%b1) */
- EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_1,
- offsetof(struct sk_buff, len));
- /* s %w1,<data_len>(%b1) */
- EMIT4_DISP(0x5b000000, REG_W1, BPF_REG_1,
- offsetof(struct sk_buff, data_len));
- /* stg %w1,ST_OFF_HLEN(%r0,%r15) */
+ if (jit->seen & SEEN_SKB)
+ emit_load_skb_data_hlen(jit);
+ if (jit->seen & SEEN_SKB_CHANGE)
+ /* stg %b1,ST_OFF_SKBP(%r0,%r15) */
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15,
- STK_OFF_HLEN);
- /* lg %skb_data,data_off(%b1) */
- EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
- BPF_REG_1, offsetof(struct sk_buff, data));
+ STK_OFF_SKBP);
+ /* Clear A (%b0) and X (%b7) registers for converted BPF programs */
+ if (is_classic) {
+ if (REG_SEEN(BPF_REG_A))
+ /* lghi %ba,0 */
+ EMIT4_IMM(0xa7090000, BPF_REG_A, 0);
+ if (REG_SEEN(BPF_REG_X))
+ /* lghi %bx,0 */
+ EMIT4_IMM(0xa7090000, BPF_REG_X, 0);
}
- /* BPF compatibility: clear A (%b0) and X (%b7) registers */
- if (REG_SEEN(BPF_REG_A))
- /* lghi %ba,0 */
- EMIT4_IMM(0xa7090000, BPF_REG_A, 0);
- if (REG_SEEN(BPF_REG_X))
- /* lghi %bx,0 */
- EMIT4_IMM(0xa7090000, BPF_REG_X, 0);
}
/*
@@ -943,14 +978,90 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
REG_SET_SEEN(BPF_REG_5);
jit->seen |= SEEN_FUNC;
/* lg %w1,<d(imm)>(%l) */
- EMIT6_DISP(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
- EMIT_CONST_U64(func));
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
+ EMIT_CONST_U64(func));
/* basr %r14,%w1 */
EMIT2(0x0d00, REG_14, REG_W1);
/* lgr %b0,%r2: load return value into %b0 */
EMIT4(0xb9040000, BPF_REG_0, REG_2);
+ if (bpf_helper_changes_skb_data((void *)func)) {
+ jit->seen |= SEEN_SKB_CHANGE;
+ /* lg %b1,ST_OFF_SKBP(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
+ REG_15, STK_OFF_SKBP);
+ emit_load_skb_data_hlen(jit);
+ }
break;
}
+ case BPF_JMP | BPF_CALL | BPF_X:
+ /*
+ * Implicit input:
+ * B1: pointer to ctx
+ * B2: pointer to bpf_array
+ * B3: index in bpf_array
+ */
+ jit->seen |= SEEN_TAIL_CALL;
+
+ /*
+ * if (index >= array->map.max_entries)
+ * goto out;
+ */
+
+ /* llgf %w1,map.max_entries(%b2) */
+ EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2,
+ offsetof(struct bpf_array, map.max_entries));
+ /* clgrj %b3,%w1,0xa,label0: if %b3 >= %w1 goto out */
+ EMIT6_PCREL_LABEL(0xec000000, 0x0065, BPF_REG_3,
+ REG_W1, 0, 0xa);
+
+ /*
+ * if (tail_call_cnt++ > MAX_TAIL_CALL_CNT)
+ * goto out;
+ */
+
+ if (jit->seen & SEEN_STACK)
+ off = STK_OFF_TCCNT + STK_OFF;
+ else
+ off = STK_OFF_TCCNT;
+ /* lhi %w0,1 */
+ EMIT4_IMM(0xa7080000, REG_W0, 1);
+ /* laal %w1,%w0,off(%r15) */
+ EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
+ /* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */
+ EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1,
+ MAX_TAIL_CALL_CNT, 0, 0x2);
+
+ /*
+ * prog = array->ptrs[index];
+ * if (prog == NULL)
+ * goto out;
+ */
+
+ /* sllg %r1,%b3,3: %r1 = index * 8 */
+ EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, BPF_REG_3, REG_0, 3);
+ /* lg %r1,prog(%b2,%r1) */
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, BPF_REG_2,
+ REG_1, offsetof(struct bpf_array, ptrs));
+ /* clgij %r1,0,0x8,label0 */
+ EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007d, REG_1, 0, 0, 0x8);
+
+ /*
+ * Restore registers before calling function
+ */
+ save_restore_regs(jit, REGS_RESTORE);
+
+ /*
+ * goto *(prog->bpf_func + tail_call_start);
+ */
+
+ /* lg %r1,bpf_func(%r1) */
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_1, REG_0,
+ offsetof(struct bpf_prog, bpf_func));
+ /* bc 0xf,tail_call_start(%r1) */
+ _EMIT4(0x47f01000 + jit->tail_call_start);
+ /* out: */
+ jit->labels[0] = jit->prg;
+ break;
case BPF_JMP | BPF_EXIT: /* return b0 */
last = (i == fp->len - 1) ? 1 : 0;
if (last && !(jit->seen & SEEN_RET0))
@@ -1134,7 +1245,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)
jit->lit = jit->lit_start;
jit->prg = 0;
- bpf_jit_prologue(jit);
+ bpf_jit_prologue(jit, bpf_prog_was_classic(fp));
for (i = 0; i < fp->len; i += insn_count) {
insn_count = bpf_jit_insn(jit, fp, i);
if (insn_count < 0)
@@ -1199,7 +1310,7 @@ void bpf_int_jit_compile(struct bpf_prog *fp)
if (jit.prg_buf) {
set_memory_ro((unsigned long)header, header->pages);
fp->bpf_func = (void *) jit.prg_buf;
- fp->jited = true;
+ fp->jited = 1;
}
free_addrs:
kfree(jit.addrs);
diff --git a/kernel/arch/s390/numa/Makefile b/kernel/arch/s390/numa/Makefile
new file mode 100644
index 000000000..f94ecaffa
--- /dev/null
+++ b/kernel/arch/s390/numa/Makefile
@@ -0,0 +1,3 @@
+obj-y += numa.o
+obj-y += toptree.o
+obj-$(CONFIG_NUMA_EMU) += mode_emu.o
diff --git a/kernel/arch/s390/numa/mode_emu.c b/kernel/arch/s390/numa/mode_emu.c
new file mode 100644
index 000000000..828d0695d
--- /dev/null
+++ b/kernel/arch/s390/numa/mode_emu.c
@@ -0,0 +1,536 @@
+/*
+ * NUMA support for s390
+ *
+ * NUMA emulation (aka fake NUMA) distributes the available memory to nodes
+ * without using real topology information about the physical memory of the
+ * machine.
+ *
+ * It distributes the available CPUs to nodes while respecting the original
+ * machine topology information. This is done by trying to avoid to separate
+ * CPUs which reside on the same book or even on the same MC.
+ *
+ * Because the current Linux scheduler code requires a stable cpu to node
+ * mapping, cores are pinned to nodes when the first CPU thread is set online.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#define KMSG_COMPONENT "numa_emu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+#include <linux/memblock.h>
+#include <linux/node.h>
+#include <linux/memory.h>
+#include <linux/slab.h>
+#include <asm/smp.h>
+#include <asm/topology.h>
+#include "numa_mode.h"
+#include "toptree.h"
+
+/* Distances between the different system components */
+#define DIST_EMPTY 0
+#define DIST_CORE 1
+#define DIST_MC 2
+#define DIST_BOOK 3
+#define DIST_MAX 4
+
+/* Node distance reported to common code */
+#define EMU_NODE_DIST 10
+
+/* Node ID for free (not yet pinned) cores */
+#define NODE_ID_FREE -1
+
+/* Different levels of toptree */
+enum toptree_level {CORE, MC, BOOK, NODE, TOPOLOGY};
+
+/* The two toptree IDs */
+enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA};
+
+/* Number of NUMA nodes */
+static int emu_nodes = 1;
+/* NUMA stripe size */
+static unsigned long emu_size;
+
+/*
+ * Node to core pinning information updates are protected by
+ * "sched_domains_mutex".
+ */
+static struct {
+ s32 to_node_id[CONFIG_NR_CPUS]; /* Pinned core to node mapping */
+ int total; /* Total number of pinned cores */
+ int per_node_target; /* Cores per node without extra cores */
+ int per_node[MAX_NUMNODES]; /* Number of cores pinned to node */
+} *emu_cores;
+
+/*
+ * Pin a core to a node
+ */
+static void pin_core_to_node(int core_id, int node_id)
+{
+ if (emu_cores->to_node_id[core_id] == NODE_ID_FREE) {
+ emu_cores->per_node[node_id]++;
+ emu_cores->to_node_id[core_id] = node_id;
+ emu_cores->total++;
+ } else {
+ WARN_ON(emu_cores->to_node_id[core_id] != node_id);
+ }
+}
+
+/*
+ * Number of pinned cores of a node
+ */
+static int cores_pinned(struct toptree *node)
+{
+ return emu_cores->per_node[node->id];
+}
+
+/*
+ * ID of the node where the core is pinned (or NODE_ID_FREE)
+ */
+static int core_pinned_to_node_id(struct toptree *core)
+{
+ return emu_cores->to_node_id[core->id];
+}
+
+/*
+ * Number of cores in the tree that are not yet pinned
+ */
+static int cores_free(struct toptree *tree)
+{
+ struct toptree *core;
+ int count = 0;
+
+ toptree_for_each(core, tree, CORE) {
+ if (core_pinned_to_node_id(core) == NODE_ID_FREE)
+ count++;
+ }
+ return count;
+}
+
+/*
+ * Return node of core
+ */
+static struct toptree *core_node(struct toptree *core)
+{
+ return core->parent->parent->parent;
+}
+
+/*
+ * Return book of core
+ */
+static struct toptree *core_book(struct toptree *core)
+{
+ return core->parent->parent;
+}
+
+/*
+ * Return mc of core
+ */
+static struct toptree *core_mc(struct toptree *core)
+{
+ return core->parent;
+}
+
+/*
+ * Distance between two cores
+ */
+static int dist_core_to_core(struct toptree *core1, struct toptree *core2)
+{
+ if (core_book(core1)->id != core_book(core2)->id)
+ return DIST_BOOK;
+ if (core_mc(core1)->id != core_mc(core2)->id)
+ return DIST_MC;
+ /* Same core or sibling on same MC */
+ return DIST_CORE;
+}
+
+/*
+ * Distance of a node to a core
+ */
+static int dist_node_to_core(struct toptree *node, struct toptree *core)
+{
+ struct toptree *core_node;
+ int dist_min = DIST_MAX;
+
+ toptree_for_each(core_node, node, CORE)
+ dist_min = min(dist_min, dist_core_to_core(core_node, core));
+ return dist_min == DIST_MAX ? DIST_EMPTY : dist_min;
+}
+
+/*
+ * Unify will delete empty nodes, therefore recreate nodes.
+ */
+static void toptree_unify_tree(struct toptree *tree)
+{
+ int nid;
+
+ toptree_unify(tree);
+ for (nid = 0; nid < emu_nodes; nid++)
+ toptree_get_child(tree, nid);
+}
+
+/*
+ * Find the best/nearest node for a given core and ensure that no node
+ * gets more than "emu_cores->per_node_target + extra" cores.
+ */
+static struct toptree *node_for_core(struct toptree *numa, struct toptree *core,
+ int extra)
+{
+ struct toptree *node, *node_best = NULL;
+ int dist_cur, dist_best, cores_target;
+
+ cores_target = emu_cores->per_node_target + extra;
+ dist_best = DIST_MAX;
+ node_best = NULL;
+ toptree_for_each(node, numa, NODE) {
+ /* Already pinned cores must use their nodes */
+ if (core_pinned_to_node_id(core) == node->id) {
+ node_best = node;
+ break;
+ }
+ /* Skip nodes that already have enough cores */
+ if (cores_pinned(node) >= cores_target)
+ continue;
+ dist_cur = dist_node_to_core(node, core);
+ if (dist_cur < dist_best) {
+ dist_best = dist_cur;
+ node_best = node;
+ }
+ }
+ return node_best;
+}
+
+/*
+ * Find the best node for each core with respect to "extra" core count
+ */
+static void toptree_to_numa_single(struct toptree *numa, struct toptree *phys,
+ int extra)
+{
+ struct toptree *node, *core, *tmp;
+
+ toptree_for_each_safe(core, tmp, phys, CORE) {
+ node = node_for_core(numa, core, extra);
+ if (!node)
+ return;
+ toptree_move(core, node);
+ pin_core_to_node(core->id, node->id);
+ }
+}
+
+/*
+ * Move structures of given level to specified NUMA node
+ */
+static void move_level_to_numa_node(struct toptree *node, struct toptree *phys,
+ enum toptree_level level, bool perfect)
+{
+ int cores_free, cores_target = emu_cores->per_node_target;
+ struct toptree *cur, *tmp;
+
+ toptree_for_each_safe(cur, tmp, phys, level) {
+ cores_free = cores_target - toptree_count(node, CORE);
+ if (perfect) {
+ if (cores_free == toptree_count(cur, CORE))
+ toptree_move(cur, node);
+ } else {
+ if (cores_free >= toptree_count(cur, CORE))
+ toptree_move(cur, node);
+ }
+ }
+}
+
+/*
+ * Move structures of a given level to NUMA nodes. If "perfect" is specified
+ * move only perfectly fitting structures. Otherwise move also smaller
+ * than needed structures.
+ */
+static void move_level_to_numa(struct toptree *numa, struct toptree *phys,
+ enum toptree_level level, bool perfect)
+{
+ struct toptree *node;
+
+ toptree_for_each(node, numa, NODE)
+ move_level_to_numa_node(node, phys, level, perfect);
+}
+
+/*
+ * For the first run try to move the big structures
+ */
+static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys)
+{
+ struct toptree *core;
+
+ /* Always try to move perfectly fitting structures first */
+ move_level_to_numa(numa, phys, BOOK, true);
+ move_level_to_numa(numa, phys, BOOK, false);
+ move_level_to_numa(numa, phys, MC, true);
+ move_level_to_numa(numa, phys, MC, false);
+ /* Now pin all the moved cores */
+ toptree_for_each(core, numa, CORE)
+ pin_core_to_node(core->id, core_node(core)->id);
+}
+
+/*
+ * Allocate new topology and create required nodes
+ */
+static struct toptree *toptree_new(int id, int nodes)
+{
+ struct toptree *tree;
+ int nid;
+
+ tree = toptree_alloc(TOPOLOGY, id);
+ if (!tree)
+ goto fail;
+ for (nid = 0; nid < nodes; nid++) {
+ if (!toptree_get_child(tree, nid))
+ goto fail;
+ }
+ return tree;
+fail:
+ panic("NUMA emulation could not allocate topology");
+}
+
+/*
+ * Allocate and initialize core to node mapping
+ */
+static void create_core_to_node_map(void)
+{
+ int i;
+
+ emu_cores = kzalloc(sizeof(*emu_cores), GFP_KERNEL);
+ if (emu_cores == NULL)
+ panic("Could not allocate cores to node memory");
+ for (i = 0; i < ARRAY_SIZE(emu_cores->to_node_id); i++)
+ emu_cores->to_node_id[i] = NODE_ID_FREE;
+}
+
+/*
+ * Move cores from physical topology into NUMA target topology
+ * and try to keep as much of the physical topology as possible.
+ */
+static struct toptree *toptree_to_numa(struct toptree *phys)
+{
+ static int first = 1;
+ struct toptree *numa;
+ int cores_total;
+
+ cores_total = emu_cores->total + cores_free(phys);
+ emu_cores->per_node_target = cores_total / emu_nodes;
+ numa = toptree_new(TOPTREE_ID_NUMA, emu_nodes);
+ if (first) {
+ toptree_to_numa_first(numa, phys);
+ first = 0;
+ }
+ toptree_to_numa_single(numa, phys, 0);
+ toptree_to_numa_single(numa, phys, 1);
+ toptree_unify_tree(numa);
+
+ WARN_ON(cpumask_weight(&phys->mask));
+ return numa;
+}
+
+/*
+ * Create a toptree out of the physical topology that we got from the hypervisor
+ */
+static struct toptree *toptree_from_topology(void)
+{
+ struct toptree *phys, *node, *book, *mc, *core;
+ struct cpu_topology_s390 *top;
+ int cpu;
+
+ phys = toptree_new(TOPTREE_ID_PHYS, 1);
+
+ for_each_online_cpu(cpu) {
+ top = &per_cpu(cpu_topology, cpu);
+ node = toptree_get_child(phys, 0);
+ book = toptree_get_child(node, top->book_id);
+ mc = toptree_get_child(book, top->socket_id);
+ core = toptree_get_child(mc, top->core_id);
+ if (!book || !mc || !core)
+ panic("NUMA emulation could not allocate memory");
+ cpumask_set_cpu(cpu, &core->mask);
+ toptree_update_mask(mc);
+ }
+ return phys;
+}
+
+/*
+ * Add toptree core to topology and create correct CPU masks
+ */
+static void topology_add_core(struct toptree *core)
+{
+ struct cpu_topology_s390 *top;
+ int cpu;
+
+ for_each_cpu(cpu, &core->mask) {
+ top = &per_cpu(cpu_topology, cpu);
+ cpumask_copy(&top->thread_mask, &core->mask);
+ cpumask_copy(&top->core_mask, &core_mc(core)->mask);
+ cpumask_copy(&top->book_mask, &core_book(core)->mask);
+ cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]);
+ top->node_id = core_node(core)->id;
+ }
+}
+
+/*
+ * Apply toptree to topology and create CPU masks
+ */
+static void toptree_to_topology(struct toptree *numa)
+{
+ struct toptree *core;
+ int i;
+
+ /* Clear all node masks */
+ for (i = 0; i < MAX_NUMNODES; i++)
+ cpumask_clear(&node_to_cpumask_map[i]);
+
+ /* Rebuild all masks */
+ toptree_for_each(core, numa, CORE)
+ topology_add_core(core);
+}
+
+/*
+ * Show the node to core mapping
+ */
+static void print_node_to_core_map(void)
+{
+ int nid, cid;
+
+ if (!numa_debug_enabled)
+ return;
+ printk(KERN_DEBUG "NUMA node to core mapping\n");
+ for (nid = 0; nid < emu_nodes; nid++) {
+ printk(KERN_DEBUG " node %3d: ", nid);
+ for (cid = 0; cid < ARRAY_SIZE(emu_cores->to_node_id); cid++) {
+ if (emu_cores->to_node_id[cid] == nid)
+ printk(KERN_CONT "%d ", cid);
+ }
+ printk(KERN_CONT "\n");
+ }
+}
+
+/*
+ * Transfer physical topology into a NUMA topology and modify CPU masks
+ * according to the NUMA topology.
+ *
+ * Must be called with "sched_domains_mutex" lock held.
+ */
+static void emu_update_cpu_topology(void)
+{
+ struct toptree *phys, *numa;
+
+ if (emu_cores == NULL)
+ create_core_to_node_map();
+ phys = toptree_from_topology();
+ numa = toptree_to_numa(phys);
+ toptree_free(phys);
+ toptree_to_topology(numa);
+ toptree_free(numa);
+ print_node_to_core_map();
+}
+
+/*
+ * If emu_size is not set, use CONFIG_EMU_SIZE. Then round to minimum
+ * alignment (needed for memory hotplug).
+ */
+static unsigned long emu_setup_size_adjust(unsigned long size)
+{
+ unsigned long size_new;
+
+ size = size ? : CONFIG_EMU_SIZE;
+ size_new = roundup(size, memory_block_size_bytes());
+ if (size_new == size)
+ return size;
+ pr_warn("Increasing memory stripe size from %ld MB to %ld MB\n",
+ size >> 20, size_new >> 20);
+ return size_new;
+}
+
+/*
+ * If we have not enough memory for the specified nodes, reduce the node count.
+ */
+static int emu_setup_nodes_adjust(int nodes)
+{
+ int nodes_max;
+
+ nodes_max = memblock.memory.total_size / emu_size;
+ nodes_max = max(nodes_max, 1);
+ if (nodes_max >= nodes)
+ return nodes;
+ pr_warn("Not enough memory for %d nodes, reducing node count\n", nodes);
+ return nodes_max;
+}
+
+/*
+ * Early emu setup
+ */
+static void emu_setup(void)
+{
+ emu_size = emu_setup_size_adjust(emu_size);
+ emu_nodes = emu_setup_nodes_adjust(emu_nodes);
+ pr_info("Creating %d nodes with memory stripe size %ld MB\n",
+ emu_nodes, emu_size >> 20);
+}
+
+/*
+ * Return node id for given page number
+ */
+static int emu_pfn_to_nid(unsigned long pfn)
+{
+ return (pfn / (emu_size >> PAGE_SHIFT)) % emu_nodes;
+}
+
+/*
+ * Return stripe size
+ */
+static unsigned long emu_align(void)
+{
+ return emu_size;
+}
+
+/*
+ * Return distance between two nodes
+ */
+static int emu_distance(int node1, int node2)
+{
+ return (node1 != node2) * EMU_NODE_DIST;
+}
+
+/*
+ * Define callbacks for generic s390 NUMA infrastructure
+ */
+const struct numa_mode numa_mode_emu = {
+ .name = "emu",
+ .setup = emu_setup,
+ .update_cpu_topology = emu_update_cpu_topology,
+ .__pfn_to_nid = emu_pfn_to_nid,
+ .align = emu_align,
+ .distance = emu_distance,
+};
+
+/*
+ * Kernel parameter: emu_nodes=<n>
+ */
+static int __init early_parse_emu_nodes(char *p)
+{
+ int count;
+
+ if (kstrtoint(p, 0, &count) != 0 || count <= 0)
+ return 0;
+ if (count <= 0)
+ return 0;
+ emu_nodes = min(count, MAX_NUMNODES);
+ return 0;
+}
+early_param("emu_nodes", early_parse_emu_nodes);
+
+/*
+ * Kernel parameter: emu_size=[<n>[k|M|G|T]]
+ */
+static int __init early_parse_emu_size(char *p)
+{
+ emu_size = memparse(p, NULL);
+ return 0;
+}
+early_param("emu_size", early_parse_emu_size);
diff --git a/kernel/arch/s390/numa/numa.c b/kernel/arch/s390/numa/numa.c
new file mode 100644
index 000000000..43f32ce60
--- /dev/null
+++ b/kernel/arch/s390/numa/numa.c
@@ -0,0 +1,184 @@
+/*
+ * NUMA support for s390
+ *
+ * Implement NUMA core code.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#define KMSG_COMPONENT "numa"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/mmzone.h>
+#include <linux/cpumask.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+#include <linux/node.h>
+
+#include <asm/numa.h>
+#include "numa_mode.h"
+
+pg_data_t *node_data[MAX_NUMNODES];
+EXPORT_SYMBOL(node_data);
+
+cpumask_t node_to_cpumask_map[MAX_NUMNODES];
+EXPORT_SYMBOL(node_to_cpumask_map);
+
+const struct numa_mode numa_mode_plain = {
+ .name = "plain",
+};
+
+static const struct numa_mode *mode = &numa_mode_plain;
+
+int numa_pfn_to_nid(unsigned long pfn)
+{
+ return mode->__pfn_to_nid ? mode->__pfn_to_nid(pfn) : 0;
+}
+
+void numa_update_cpu_topology(void)
+{
+ if (mode->update_cpu_topology)
+ mode->update_cpu_topology();
+}
+
+int __node_distance(int a, int b)
+{
+ return mode->distance ? mode->distance(a, b) : 0;
+}
+
+int numa_debug_enabled;
+
+/*
+ * alloc_node_data() - Allocate node data
+ */
+static __init pg_data_t *alloc_node_data(void)
+{
+ pg_data_t *res;
+
+ res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 1);
+ if (!res)
+ panic("Could not allocate memory for node data!\n");
+ memset(res, 0, sizeof(pg_data_t));
+ return res;
+}
+
+/*
+ * numa_setup_memory() - Assign bootmem to nodes
+ *
+ * The memory is first added to memblock without any respect to nodes.
+ * This is fixed before remaining memblock memory is handed over to the
+ * buddy allocator.
+ * An important side effect is that large bootmem allocations might easily
+ * cross node boundaries, which can be needed for large allocations with
+ * smaller memory stripes in each node (i.e. when using NUMA emulation).
+ *
+ * Memory defines nodes:
+ * Therefore this routine also sets the nodes online with memory.
+ */
+static void __init numa_setup_memory(void)
+{
+ unsigned long cur_base, align, end_of_dram;
+ int nid = 0;
+
+ end_of_dram = memblock_end_of_DRAM();
+ align = mode->align ? mode->align() : ULONG_MAX;
+
+ /*
+ * Step through all available memory and assign it to the nodes
+ * indicated by the mode implementation.
+ * All nodes which are seen here will be set online.
+ */
+ cur_base = 0;
+ do {
+ nid = numa_pfn_to_nid(PFN_DOWN(cur_base));
+ node_set_online(nid);
+ memblock_set_node(cur_base, align, &memblock.memory, nid);
+ cur_base += align;
+ } while (cur_base < end_of_dram);
+
+ /* Allocate and fill out node_data */
+ for (nid = 0; nid < MAX_NUMNODES; nid++)
+ NODE_DATA(nid) = alloc_node_data();
+
+ for_each_online_node(nid) {
+ unsigned long start_pfn, end_pfn;
+ unsigned long t_start, t_end;
+ int i;
+
+ start_pfn = ULONG_MAX;
+ end_pfn = 0;
+ for_each_mem_pfn_range(i, nid, &t_start, &t_end, NULL) {
+ if (t_start < start_pfn)
+ start_pfn = t_start;
+ if (t_end > end_pfn)
+ end_pfn = t_end;
+ }
+ NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+ NODE_DATA(nid)->node_id = nid;
+ }
+}
+
+/*
+ * numa_setup() - Earliest initialization
+ *
+ * Assign the mode and call the mode's setup routine.
+ */
+void __init numa_setup(void)
+{
+ pr_info("NUMA mode: %s\n", mode->name);
+ if (mode->setup)
+ mode->setup();
+ numa_setup_memory();
+ memblock_dump_all();
+}
+
+
+/*
+ * numa_init_early() - Initialization initcall
+ *
+ * This runs when only one CPU is online and before the first
+ * topology update is called for by the scheduler.
+ */
+static int __init numa_init_early(void)
+{
+ /* Attach all possible CPUs to node 0 for now. */
+ cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask);
+ return 0;
+}
+early_initcall(numa_init_early);
+
+/*
+ * numa_init_late() - Initialization initcall
+ *
+ * Register NUMA nodes.
+ */
+static int __init numa_init_late(void)
+{
+ int nid;
+
+ for_each_online_node(nid)
+ register_one_node(nid);
+ return 0;
+}
+device_initcall(numa_init_late);
+
+static int __init parse_debug(char *parm)
+{
+ numa_debug_enabled = 1;
+ return 0;
+}
+early_param("numa_debug", parse_debug);
+
+static int __init parse_numa(char *parm)
+{
+ if (strcmp(parm, numa_mode_plain.name) == 0)
+ mode = &numa_mode_plain;
+#ifdef CONFIG_NUMA_EMU
+ if (strcmp(parm, numa_mode_emu.name) == 0)
+ mode = &numa_mode_emu;
+#endif
+ return 0;
+}
+early_param("numa", parse_numa);
diff --git a/kernel/arch/s390/numa/numa_mode.h b/kernel/arch/s390/numa/numa_mode.h
new file mode 100644
index 000000000..08953b0b1
--- /dev/null
+++ b/kernel/arch/s390/numa/numa_mode.h
@@ -0,0 +1,24 @@
+/*
+ * NUMA support for s390
+ *
+ * Define declarations used for communication between NUMA mode
+ * implementations and NUMA core functionality.
+ *
+ * Copyright IBM Corp. 2015
+ */
+#ifndef __S390_NUMA_MODE_H
+#define __S390_NUMA_MODE_H
+
+struct numa_mode {
+ char *name; /* Name of mode */
+ void (*setup)(void); /* Initizalize mode */
+ void (*update_cpu_topology)(void); /* Called by topology code */
+ int (*__pfn_to_nid)(unsigned long pfn); /* PFN to node ID */
+ unsigned long (*align)(void); /* Minimum node alignment */
+ int (*distance)(int a, int b); /* Distance between two nodes */
+};
+
+extern const struct numa_mode numa_mode_plain;
+extern const struct numa_mode numa_mode_emu;
+
+#endif /* __S390_NUMA_MODE_H */
diff --git a/kernel/arch/s390/numa/toptree.c b/kernel/arch/s390/numa/toptree.c
new file mode 100644
index 000000000..902d350d8
--- /dev/null
+++ b/kernel/arch/s390/numa/toptree.c
@@ -0,0 +1,342 @@
+/*
+ * NUMA support for s390
+ *
+ * A tree structure used for machine topology mangling
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+#include <linux/list.h>
+#include <linux/list_sort.h>
+#include <linux/slab.h>
+#include <asm/numa.h>
+
+#include "toptree.h"
+
+/**
+ * toptree_alloc - Allocate and initialize a new tree node.
+ * @level: The node's vertical level; level 0 contains the leaves.
+ * @id: ID number, explicitly not unique beyond scope of node's siblings
+ *
+ * Allocate a new tree node and initialize it.
+ *
+ * RETURNS:
+ * Pointer to the new tree node or NULL on error
+ */
+struct toptree *toptree_alloc(int level, int id)
+{
+ struct toptree *res = kzalloc(sizeof(struct toptree), GFP_KERNEL);
+
+ if (!res)
+ return res;
+
+ INIT_LIST_HEAD(&res->children);
+ INIT_LIST_HEAD(&res->sibling);
+ cpumask_clear(&res->mask);
+ res->level = level;
+ res->id = id;
+ return res;
+}
+
+/**
+ * toptree_remove - Remove a tree node from a tree
+ * @cand: Pointer to the node to remove
+ *
+ * The node is detached from its parent node. The parent node's
+ * masks will be updated to reflect the loss of the child.
+ */
+static void toptree_remove(struct toptree *cand)
+{
+ struct toptree *oldparent;
+
+ list_del_init(&cand->sibling);
+ oldparent = cand->parent;
+ cand->parent = NULL;
+ toptree_update_mask(oldparent);
+}
+
+/**
+ * toptree_free - discard a tree node
+ * @cand: Pointer to the tree node to discard
+ *
+ * Checks if @cand is attached to a parent node. Detaches it
+ * cleanly using toptree_remove. Possible children are freed
+ * recursively. In the end @cand itself is freed.
+ */
+void toptree_free(struct toptree *cand)
+{
+ struct toptree *child, *tmp;
+
+ if (cand->parent)
+ toptree_remove(cand);
+ toptree_for_each_child_safe(child, tmp, cand)
+ toptree_free(child);
+ kfree(cand);
+}
+
+/**
+ * toptree_update_mask - Update node bitmasks
+ * @cand: Pointer to a tree node
+ *
+ * The node's cpumask will be updated by combining all children's
+ * masks. Then toptree_update_mask is called recursively for the
+ * parent if applicable.
+ *
+ * NOTE:
+ * This must not be called on leaves. If called on a leaf, its
+ * CPU mask is cleared and lost.
+ */
+void toptree_update_mask(struct toptree *cand)
+{
+ struct toptree *child;
+
+ cpumask_clear(&cand->mask);
+ list_for_each_entry(child, &cand->children, sibling)
+ cpumask_or(&cand->mask, &cand->mask, &child->mask);
+ if (cand->parent)
+ toptree_update_mask(cand->parent);
+}
+
+/**
+ * toptree_insert - Insert a tree node into tree
+ * @cand: Pointer to the node to insert
+ * @target: Pointer to the node to which @cand will added as a child
+ *
+ * Insert a tree node into a tree. Masks will be updated automatically.
+ *
+ * RETURNS:
+ * 0 on success, -1 if NULL is passed as argument or the node levels
+ * don't fit.
+ */
+static int toptree_insert(struct toptree *cand, struct toptree *target)
+{
+ if (!cand || !target)
+ return -1;
+ if (target->level != (cand->level + 1))
+ return -1;
+ list_add_tail(&cand->sibling, &target->children);
+ cand->parent = target;
+ toptree_update_mask(target);
+ return 0;
+}
+
+/**
+ * toptree_move_children - Move all child nodes of a node to a new place
+ * @cand: Pointer to the node whose children are to be moved
+ * @target: Pointer to the node to which @cand's children will be attached
+ *
+ * Take all child nodes of @cand and move them using toptree_move.
+ */
+static void toptree_move_children(struct toptree *cand, struct toptree *target)
+{
+ struct toptree *child, *tmp;
+
+ toptree_for_each_child_safe(child, tmp, cand)
+ toptree_move(child, target);
+}
+
+/**
+ * toptree_unify - Merge children with same ID
+ * @cand: Pointer to node whose direct children should be made unique
+ *
+ * When mangling the tree it is possible that a node has two or more children
+ * which have the same ID. This routine merges these children into one and
+ * moves all children of the merged nodes into the unified node.
+ */
+void toptree_unify(struct toptree *cand)
+{
+ struct toptree *child, *tmp, *cand_copy;
+
+ /* Threads cannot be split, cores are not split */
+ if (cand->level < 2)
+ return;
+
+ cand_copy = toptree_alloc(cand->level, 0);
+ toptree_for_each_child_safe(child, tmp, cand) {
+ struct toptree *tmpchild;
+
+ if (!cpumask_empty(&child->mask)) {
+ tmpchild = toptree_get_child(cand_copy, child->id);
+ toptree_move_children(child, tmpchild);
+ }
+ toptree_free(child);
+ }
+ toptree_move_children(cand_copy, cand);
+ toptree_free(cand_copy);
+
+ toptree_for_each_child(child, cand)
+ toptree_unify(child);
+}
+
+/**
+ * toptree_move - Move a node to another context
+ * @cand: Pointer to the node to move
+ * @target: Pointer to the node where @cand should go
+ *
+ * In the easiest case @cand is exactly on the level below @target
+ * and will be immediately moved to the target.
+ *
+ * If @target's level is not the direct parent level of @cand,
+ * nodes for the missing levels are created and put between
+ * @cand and @target. The "stacking" nodes' IDs are taken from
+ * @cand's parents.
+ *
+ * After this it is likely to have redundant nodes in the tree
+ * which are addressed by means of toptree_unify.
+ */
+void toptree_move(struct toptree *cand, struct toptree *target)
+{
+ struct toptree *stack_target, *real_insert_point, *ptr, *tmp;
+
+ if (cand->level + 1 == target->level) {
+ toptree_remove(cand);
+ toptree_insert(cand, target);
+ return;
+ }
+
+ real_insert_point = NULL;
+ ptr = cand;
+ stack_target = NULL;
+
+ do {
+ tmp = stack_target;
+ stack_target = toptree_alloc(ptr->level + 1,
+ ptr->parent->id);
+ toptree_insert(tmp, stack_target);
+ if (!real_insert_point)
+ real_insert_point = stack_target;
+ ptr = ptr->parent;
+ } while (stack_target->level < (target->level - 1));
+
+ toptree_remove(cand);
+ toptree_insert(cand, real_insert_point);
+ toptree_insert(stack_target, target);
+}
+
+/**
+ * toptree_get_child - Access a tree node's child by its ID
+ * @cand: Pointer to tree node whose child is to access
+ * @id: The desired child's ID
+ *
+ * @cand's children are searched for a child with matching ID.
+ * If no match can be found, a new child with the desired ID
+ * is created and returned.
+ */
+struct toptree *toptree_get_child(struct toptree *cand, int id)
+{
+ struct toptree *child;
+
+ toptree_for_each_child(child, cand)
+ if (child->id == id)
+ return child;
+ child = toptree_alloc(cand->level-1, id);
+ toptree_insert(child, cand);
+ return child;
+}
+
+/**
+ * toptree_first - Find the first descendant on specified level
+ * @context: Pointer to tree node whose descendants are to be used
+ * @level: The level of interest
+ *
+ * RETURNS:
+ * @context's first descendant on the specified level, or NULL
+ * if there is no matching descendant
+ */
+struct toptree *toptree_first(struct toptree *context, int level)
+{
+ struct toptree *child, *tmp;
+
+ if (context->level == level)
+ return context;
+
+ if (!list_empty(&context->children)) {
+ list_for_each_entry(child, &context->children, sibling) {
+ tmp = toptree_first(child, level);
+ if (tmp)
+ return tmp;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * toptree_next_sibling - Return next sibling
+ * @cur: Pointer to a tree node
+ *
+ * RETURNS:
+ * If @cur has a parent and is not the last in the parent's children list,
+ * the next sibling is returned. Or NULL when there are no siblings left.
+ */
+static struct toptree *toptree_next_sibling(struct toptree *cur)
+{
+ if (cur->parent == NULL)
+ return NULL;
+
+ if (cur == list_last_entry(&cur->parent->children,
+ struct toptree, sibling))
+ return NULL;
+ return (struct toptree *) list_next_entry(cur, sibling);
+}
+
+/**
+ * toptree_next - Tree traversal function
+ * @cur: Pointer to current element
+ * @context: Pointer to the root node of the tree or subtree to
+ * be traversed.
+ * @level: The level of interest.
+ *
+ * RETURNS:
+ * Pointer to the next node on level @level
+ * or NULL when there is no next node.
+ */
+struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
+ int level)
+{
+ struct toptree *cur_context, *tmp;
+
+ if (!cur)
+ return NULL;
+
+ if (context->level == level)
+ return NULL;
+
+ tmp = toptree_next_sibling(cur);
+ if (tmp != NULL)
+ return tmp;
+
+ cur_context = cur;
+ while (cur_context->level < context->level - 1) {
+ /* Step up */
+ cur_context = cur_context->parent;
+ /* Step aside */
+ tmp = toptree_next_sibling(cur_context);
+ if (tmp != NULL) {
+ /* Step down */
+ tmp = toptree_first(tmp, level);
+ if (tmp != NULL)
+ return tmp;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * toptree_count - Count descendants on specified level
+ * @context: Pointer to node whose descendants are to be considered
+ * @level: Only descendants on the specified level will be counted
+ *
+ * RETURNS:
+ * Number of descendants on the specified level
+ */
+int toptree_count(struct toptree *context, int level)
+{
+ struct toptree *cur;
+ int cnt = 0;
+
+ toptree_for_each(cur, context, level)
+ cnt++;
+ return cnt;
+}
diff --git a/kernel/arch/s390/numa/toptree.h b/kernel/arch/s390/numa/toptree.h
new file mode 100644
index 000000000..bdf502027
--- /dev/null
+++ b/kernel/arch/s390/numa/toptree.h
@@ -0,0 +1,60 @@
+/*
+ * NUMA support for s390
+ *
+ * A tree structure used for machine topology mangling
+ *
+ * Copyright IBM Corp. 2015
+ */
+#ifndef S390_TOPTREE_H
+#define S390_TOPTREE_H
+
+#include <linux/cpumask.h>
+#include <linux/list.h>
+
+struct toptree {
+ int level;
+ int id;
+ cpumask_t mask;
+ struct toptree *parent;
+ struct list_head sibling;
+ struct list_head children;
+};
+
+struct toptree *toptree_alloc(int level, int id);
+void toptree_free(struct toptree *cand);
+void toptree_update_mask(struct toptree *cand);
+void toptree_unify(struct toptree *cand);
+struct toptree *toptree_get_child(struct toptree *cand, int id);
+void toptree_move(struct toptree *cand, struct toptree *target);
+int toptree_count(struct toptree *context, int level);
+
+struct toptree *toptree_first(struct toptree *context, int level);
+struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
+ int level);
+
+#define toptree_for_each_child(child, ptree) \
+ list_for_each_entry(child, &ptree->children, sibling)
+
+#define toptree_for_each_child_safe(child, ptmp, ptree) \
+ list_for_each_entry_safe(child, ptmp, &ptree->children, sibling)
+
+#define toptree_is_last(ptree) \
+ ((ptree->parent == NULL) || \
+ (ptree->parent->children.prev == &ptree->sibling))
+
+#define toptree_for_each(ptree, cont, ttype) \
+ for (ptree = toptree_first(cont, ttype); \
+ ptree != NULL; \
+ ptree = toptree_next(ptree, cont, ttype))
+
+#define toptree_for_each_safe(ptree, tmp, cont, ttype) \
+ for (ptree = toptree_first(cont, ttype), \
+ tmp = toptree_next(ptree, cont, ttype); \
+ ptree != NULL; \
+ ptree = tmp, \
+ tmp = toptree_next(ptree, cont, ttype))
+
+#define toptree_for_each_sibling(ptree, start) \
+ toptree_for_each(ptree, start->parent, start->level)
+
+#endif /* S390_TOPTREE_H */
diff --git a/kernel/arch/s390/oprofile/init.c b/kernel/arch/s390/oprofile/init.c
index bc927a09a..9cfa2ffaa 100644
--- a/kernel/arch/s390/oprofile/init.c
+++ b/kernel/arch/s390/oprofile/init.c
@@ -16,6 +16,7 @@
#include <linux/fs.h>
#include <linux/module.h>
#include <asm/processor.h>
+#include <asm/perf_event.h>
#include "../../../drivers/oprofile/oprof.h"
diff --git a/kernel/arch/s390/pci/pci.c b/kernel/arch/s390/pci/pci.c
index 598f023cf..7ef12a3ac 100644
--- a/kernel/arch/s390/pci/pci.c
+++ b/kernel/arch/s390/pci/pci.c
@@ -76,11 +76,6 @@ EXPORT_SYMBOL_GPL(zpci_iomap_start);
static struct kmem_cache *zdev_fmb_cache;
-struct zpci_dev *get_zdev(struct pci_dev *pdev)
-{
- return (struct zpci_dev *) pdev->sysdata;
-}
-
struct zpci_dev *get_zdev_by_fid(u32 fid)
{
struct zpci_dev *tmp, *zdev = NULL;
@@ -269,7 +264,7 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev,
unsigned long offset,
unsigned long max)
{
- struct zpci_dev *zdev = get_zdev(pdev);
+ struct zpci_dev *zdev = to_zpci(pdev);
u64 addr;
int idx;
@@ -385,7 +380,7 @@ static void zpci_irq_handler(struct airq_struct *airq)
int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
{
- struct zpci_dev *zdev = get_zdev(pdev);
+ struct zpci_dev *zdev = to_zpci(pdev);
unsigned int hwirq, msi_vecs;
unsigned long aisb;
struct msi_desc *msi;
@@ -414,7 +409,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
/* Request MSI interrupts */
hwirq = 0;
- list_for_each_entry(msi, &pdev->msi_list, list) {
+ for_each_pci_msi_entry(msi, pdev) {
rc = -EIO;
irq = irq_alloc_desc(0); /* Alloc irq on node 0 */
if (irq < 0)
@@ -440,7 +435,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
return (msi_vecs == nvec) ? 0 : msi_vecs;
out_msi:
- list_for_each_entry(msi, &pdev->msi_list, list) {
+ for_each_pci_msi_entry(msi, pdev) {
if (hwirq-- == 0)
break;
irq_set_msi_desc(msi->irq, NULL);
@@ -460,7 +455,7 @@ out:
void arch_teardown_msi_irqs(struct pci_dev *pdev)
{
- struct zpci_dev *zdev = get_zdev(pdev);
+ struct zpci_dev *zdev = to_zpci(pdev);
struct msi_desc *msi;
int rc;
@@ -470,7 +465,7 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
return;
/* Release MSI interrupts */
- list_for_each_entry(msi, &pdev->msi_list, list) {
+ for_each_pci_msi_entry(msi, pdev) {
if (msi->msi_attrib.is_msix)
__pci_msix_desc_mask_irq(msi, 1);
else
@@ -637,7 +632,7 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
int i;
for (i = 0; i < PCI_BAR_COUNT; i++) {
- if (!zdev->bars[i].size)
+ if (!zdev->bars[i].size || !zdev->bars[i].res)
continue;
zpci_free_iomap(zdev, zdev->bars[i].map_idx);
@@ -648,7 +643,7 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
int pcibios_add_device(struct pci_dev *pdev)
{
- struct zpci_dev *zdev = get_zdev(pdev);
+ struct zpci_dev *zdev = to_zpci(pdev);
struct resource *res;
int i;
@@ -673,7 +668,7 @@ void pcibios_release_device(struct pci_dev *pdev)
int pcibios_enable_device(struct pci_dev *pdev, int mask)
{
- struct zpci_dev *zdev = get_zdev(pdev);
+ struct zpci_dev *zdev = to_zpci(pdev);
zdev->pdev = pdev;
zpci_debug_init_device(zdev);
@@ -684,7 +679,7 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask)
void pcibios_disable_device(struct pci_dev *pdev)
{
- struct zpci_dev *zdev = get_zdev(pdev);
+ struct zpci_dev *zdev = to_zpci(pdev);
zpci_fmb_disable_device(zdev);
zpci_debug_exit_device(zdev);
@@ -695,7 +690,7 @@ void pcibios_disable_device(struct pci_dev *pdev)
static int zpci_restore(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
- struct zpci_dev *zdev = get_zdev(pdev);
+ struct zpci_dev *zdev = to_zpci(pdev);
int ret = 0;
if (zdev->state != ZPCI_FN_STATE_ONLINE)
@@ -717,7 +712,7 @@ out:
static int zpci_freeze(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
- struct zpci_dev *zdev = get_zdev(pdev);
+ struct zpci_dev *zdev = to_zpci(pdev);
if (zdev->state != ZPCI_FN_STATE_ONLINE)
return 0;
@@ -777,17 +772,22 @@ static int zpci_scan_bus(struct zpci_dev *zdev)
ret = zpci_setup_bus_resources(zdev, &resources);
if (ret)
- return ret;
+ goto error;
zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
zdev, &resources);
if (!zdev->bus) {
- zpci_cleanup_bus_resources(zdev);
- return -EIO;
+ ret = -EIO;
+ goto error;
}
zdev->bus->max_bus_speed = zdev->max_bus_speed;
pci_bus_add_devices(zdev->bus);
return 0;
+
+error:
+ zpci_cleanup_bus_resources(zdev);
+ pci_free_resource_list(&resources);
+ return ret;
}
int zpci_enable_device(struct zpci_dev *zdev)
diff --git a/kernel/arch/s390/pci/pci_dma.c b/kernel/arch/s390/pci/pci_dma.c
index 6fd8d5836..d348f2c09 100644
--- a/kernel/arch/s390/pci/pci_dma.c
+++ b/kernel/arch/s390/pci/pci_dma.c
@@ -24,7 +24,7 @@ static int zpci_refresh_global(struct zpci_dev *zdev)
zdev->iommu_pages * PAGE_SIZE);
}
-static unsigned long *dma_alloc_cpu_table(void)
+unsigned long *dma_alloc_cpu_table(void)
{
unsigned long *table, *entry;
@@ -33,7 +33,7 @@ static unsigned long *dma_alloc_cpu_table(void)
return NULL;
for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
- *entry = ZPCI_TABLE_INVALID | ZPCI_TABLE_PROTECTED;
+ *entry = ZPCI_TABLE_INVALID;
return table;
}
@@ -51,7 +51,7 @@ static unsigned long *dma_alloc_page_table(void)
return NULL;
for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
- *entry = ZPCI_PTE_INVALID | ZPCI_TABLE_PROTECTED;
+ *entry = ZPCI_PTE_INVALID;
return table;
}
@@ -95,7 +95,7 @@ static unsigned long *dma_get_page_table_origin(unsigned long *entry)
return pto;
}
-static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
+unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
{
unsigned long *sto, *pto;
unsigned int rtx, sx, px;
@@ -114,20 +114,10 @@ static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr
return &pto[px];
}
-static void dma_update_cpu_trans(struct zpci_dev *zdev, void *page_addr,
- dma_addr_t dma_addr, int flags)
+void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags)
{
- unsigned long *entry;
-
- entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
- if (!entry) {
- WARN_ON_ONCE(1);
- return;
- }
-
if (flags & ZPCI_PTE_INVALID) {
invalidate_pt_entry(entry);
- return;
} else {
set_pt_pfaa(entry, page_addr);
validate_pt_entry(entry);
@@ -146,17 +136,25 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
u8 *page_addr = (u8 *) (pa & PAGE_MASK);
dma_addr_t start_dma_addr = dma_addr;
unsigned long irq_flags;
+ unsigned long *entry;
int i, rc = 0;
if (!nr_pages)
return -EINVAL;
spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
- if (!zdev->dma_table)
+ if (!zdev->dma_table) {
+ rc = -EINVAL;
goto no_refresh;
+ }
for (i = 0; i < nr_pages; i++) {
- dma_update_cpu_trans(zdev, page_addr, dma_addr, flags);
+ entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
+ if (!entry) {
+ rc = -ENOMEM;
+ goto undo_cpu_trans;
+ }
+ dma_update_cpu_trans(entry, page_addr, flags);
page_addr += PAGE_SIZE;
dma_addr += PAGE_SIZE;
}
@@ -175,13 +173,25 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
nr_pages * PAGE_SIZE);
+undo_cpu_trans:
+ if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
+ flags = ZPCI_PTE_INVALID;
+ while (i-- > 0) {
+ page_addr -= PAGE_SIZE;
+ dma_addr -= PAGE_SIZE;
+ entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
+ if (!entry)
+ break;
+ dma_update_cpu_trans(entry, page_addr, flags);
+ }
+ }
no_refresh:
spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
return rc;
}
-static void dma_free_seg_table(unsigned long entry)
+void dma_free_seg_table(unsigned long entry)
{
unsigned long *sto = get_rt_sto(entry);
int sx;
@@ -193,21 +203,18 @@ static void dma_free_seg_table(unsigned long entry)
dma_free_cpu_table(sto);
}
-static void dma_cleanup_tables(struct zpci_dev *zdev)
+void dma_cleanup_tables(unsigned long *table)
{
- unsigned long *table;
int rtx;
- if (!zdev || !zdev->dma_table)
+ if (!table)
return;
- table = zdev->dma_table;
for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
if (reg_entry_isvalid(table[rtx]))
dma_free_seg_table(table[rtx]);
dma_free_cpu_table(table);
- zdev->dma_table = NULL;
}
static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev,
@@ -262,53 +269,60 @@ out:
spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
}
-int dma_set_mask(struct device *dev, u64 mask)
+static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
{
- if (!dev->dma_mask || !dma_supported(dev, mask))
- return -EIO;
+ struct {
+ unsigned long rc;
+ unsigned long addr;
+ } __packed data = {rc, addr};
- *dev->dma_mask = mask;
- return 0;
+ zpci_err_hex(&data, sizeof(data));
}
-EXPORT_SYMBOL_GPL(dma_set_mask);
static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction direction,
struct dma_attrs *attrs)
{
- struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
unsigned long nr_pages, iommu_page_index;
unsigned long pa = page_to_phys(page) + offset;
int flags = ZPCI_PTE_VALID;
dma_addr_t dma_addr;
+ int ret;
/* This rounds up number of pages based on size and offset */
nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
iommu_page_index = dma_alloc_iommu(zdev, nr_pages);
- if (iommu_page_index == -1)
+ if (iommu_page_index == -1) {
+ ret = -ENOSPC;
goto out_err;
+ }
/* Use rounded up size */
size = nr_pages * PAGE_SIZE;
dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE;
- if (dma_addr + size > zdev->end_dma)
+ if (dma_addr + size > zdev->end_dma) {
+ ret = -ERANGE;
goto out_free;
+ }
if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
flags |= ZPCI_TABLE_PROTECTED;
- if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) {
- atomic64_add(nr_pages, &zdev->mapped_pages);
- return dma_addr + (offset & ~PAGE_MASK);
- }
+ ret = dma_update_trans(zdev, pa, dma_addr, size, flags);
+ if (ret)
+ goto out_free;
+
+ atomic64_add(nr_pages, &zdev->mapped_pages);
+ return dma_addr + (offset & ~PAGE_MASK);
out_free:
dma_free_iommu(zdev, iommu_page_index, nr_pages);
out_err:
zpci_err("map error:\n");
- zpci_err_hex(&pa, sizeof(pa));
+ zpci_err_dma(ret, pa);
return DMA_ERROR_CODE;
}
@@ -316,16 +330,18 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction direction,
struct dma_attrs *attrs)
{
- struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
unsigned long iommu_page_index;
- int npages;
+ int npages, ret;
npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
dma_addr = dma_addr & PAGE_MASK;
- if (dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
- ZPCI_TABLE_PROTECTED | ZPCI_PTE_INVALID)) {
+ ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
+ ZPCI_PTE_INVALID);
+ if (ret) {
zpci_err("unmap error:\n");
- zpci_err_hex(&dma_addr, sizeof(dma_addr));
+ zpci_err_dma(ret, dma_addr);
+ return;
}
atomic64_add(npages, &zdev->unmapped_pages);
@@ -337,7 +353,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag,
struct dma_attrs *attrs)
{
- struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
struct page *page;
unsigned long pa;
dma_addr_t map;
@@ -367,7 +383,7 @@ static void s390_dma_free(struct device *dev, size_t size,
void *pa, dma_addr_t dma_handle,
struct dma_attrs *attrs)
{
- struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
size = PAGE_ALIGN(size);
atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
@@ -426,6 +442,13 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
{
int rc;
+ /*
+ * At this point, if the device is part of an IOMMU domain, this would
+ * be a strong hint towards a bug in the IOMMU API (common) code and/or
+ * simultaneous access via IOMMU and DMA API. So let's issue a warning.
+ */
+ WARN_ON(zdev->s390_domain);
+
spin_lock_init(&zdev->iommu_bitmap_lock);
spin_lock_init(&zdev->dma_table_lock);
@@ -460,8 +483,16 @@ out_clean:
void zpci_dma_exit_device(struct zpci_dev *zdev)
{
+ /*
+ * At this point, if the device is part of an IOMMU domain, this would
+ * be a strong hint towards a bug in the IOMMU API (common) code and/or
+ * simultaneous access via IOMMU and DMA API. So let's issue a warning.
+ */
+ WARN_ON(zdev->s390_domain);
+
zpci_unregister_ioat(zdev, 0);
- dma_cleanup_tables(zdev);
+ dma_cleanup_tables(zdev->dma_table);
+ zdev->dma_table = NULL;
vfree(zdev->iommu_bitmap);
zdev->iommu_bitmap = NULL;
zdev->next_bit = 0;
diff --git a/kernel/arch/s390/pci/pci_event.c b/kernel/arch/s390/pci/pci_event.c
index 460fdb21c..369a3e05d 100644
--- a/kernel/arch/s390/pci/pci_event.c
+++ b/kernel/arch/s390/pci/pci_event.c
@@ -46,15 +46,13 @@ struct zpci_ccdf_avail {
static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
+ struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
zpci_err("error CCDF:\n");
zpci_err_hex(ccdf, sizeof(*ccdf));
- if (!zdev)
- return;
-
pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
- pci_name(zdev->pdev), ccdf->pec, ccdf->fid);
+ pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
}
void zpci_event_error(void *data)
@@ -75,7 +73,13 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
zpci_err_hex(ccdf, sizeof(*ccdf));
switch (ccdf->pec) {
- case 0x0301: /* Standby -> Configured */
+ case 0x0301: /* Reserved|Standby -> Configured */
+ if (!zdev) {
+ ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
+ if (ret)
+ break;
+ zdev = get_zdev_by_fid(ccdf->fid);
+ }
if (!zdev || zdev->state != ZPCI_FN_STATE_STANDBY)
break;
zdev->state = ZPCI_FN_STATE_CONFIGURED;
@@ -83,7 +87,9 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
ret = zpci_enable_device(zdev);
if (ret)
break;
+ pci_lock_rescan_remove();
pci_rescan_bus(zdev->bus);
+ pci_unlock_rescan_remove();
break;
case 0x0302: /* Reserved -> Standby */
if (!zdev)
@@ -91,7 +97,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
break;
case 0x0303: /* Deconfiguration requested */
if (pdev)
- pci_stop_and_remove_bus_device(pdev);
+ pci_stop_and_remove_bus_device_locked(pdev);
ret = zpci_disable_device(zdev);
if (ret)
@@ -108,7 +114,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
/* Give the driver a hint that the function is
* already unusable. */
pdev->error_state = pci_channel_io_perm_failure;
- pci_stop_and_remove_bus_device(pdev);
+ pci_stop_and_remove_bus_device_locked(pdev);
}
zdev->fh = ccdf->fh;
diff --git a/kernel/arch/s390/pci/pci_insn.c b/kernel/arch/s390/pci/pci_insn.c
index 85267c058..10ca15dca 100644
--- a/kernel/arch/s390/pci/pci_insn.c
+++ b/kernel/arch/s390/pci/pci_insn.c
@@ -8,10 +8,23 @@
#include <linux/errno.h>
#include <linux/delay.h>
#include <asm/pci_insn.h>
+#include <asm/pci_debug.h>
#include <asm/processor.h>
#define ZPCI_INSN_BUSY_DELAY 1 /* 1 microsecond */
+static inline void zpci_err_insn(u8 cc, u8 status, u64 req, u64 offset)
+{
+ struct {
+ u64 req;
+ u64 offset;
+ u8 cc;
+ u8 status;
+ } __packed data = {req, offset, cc, status};
+
+ zpci_err_hex(&data, sizeof(data));
+}
+
/* Modify PCI Function Controls */
static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
{
@@ -38,8 +51,8 @@ int zpci_mod_fc(u64 req, struct zpci_fib *fib)
} while (cc == 2);
if (cc)
- printk_once(KERN_ERR "%s: error cc: %d status: %d\n",
- __func__, cc, status);
+ zpci_err_insn(cc, status, req, 0);
+
return (cc) ? -EIO : 0;
}
@@ -72,8 +85,8 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
} while (cc == 2);
if (cc)
- printk_once(KERN_ERR "%s: error cc: %d status: %d dma_addr: %Lx size: %Lx\n",
- __func__, cc, status, addr, range);
+ zpci_err_insn(cc, status, addr, range);
+
return (cc) ? -EIO : 0;
}
@@ -121,8 +134,8 @@ int zpci_load(u64 *data, u64 req, u64 offset)
} while (cc == 2);
if (cc)
- printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n",
- __func__, cc, status, req, offset);
+ zpci_err_insn(cc, status, req, offset);
+
return (cc > 0) ? -EIO : cc;
}
EXPORT_SYMBOL_GPL(zpci_load);
@@ -159,8 +172,8 @@ int zpci_store(u64 data, u64 req, u64 offset)
} while (cc == 2);
if (cc)
- printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n",
- __func__, cc, status, req, offset);
+ zpci_err_insn(cc, status, req, offset);
+
return (cc > 0) ? -EIO : cc;
}
EXPORT_SYMBOL_GPL(zpci_store);
@@ -195,8 +208,8 @@ int zpci_store_block(const u64 *data, u64 req, u64 offset)
} while (cc == 2);
if (cc)
- printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n",
- __func__, cc, status, req, offset);
+ zpci_err_insn(cc, status, req, offset);
+
return (cc > 0) ? -EIO : cc;
}
EXPORT_SYMBOL_GPL(zpci_store_block);
diff --git a/kernel/arch/s390/pci/pci_sysfs.c b/kernel/arch/s390/pci/pci_sysfs.c
index fa3ce891e..f37a58088 100644
--- a/kernel/arch/s390/pci/pci_sysfs.c
+++ b/kernel/arch/s390/pci/pci_sysfs.c
@@ -16,7 +16,7 @@
static ssize_t name##_show(struct device *dev, \
struct device_attribute *attr, char *buf) \
{ \
- struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); \
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); \
\
return sprintf(buf, fmt, zdev->member); \
} \
@@ -38,23 +38,30 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct pci_dev *pdev = to_pci_dev(dev);
- struct zpci_dev *zdev = get_zdev(pdev);
+ struct zpci_dev *zdev = to_zpci(pdev);
int ret;
if (!device_remove_file_self(dev, attr))
return count;
+ pci_lock_rescan_remove();
pci_stop_and_remove_bus_device(pdev);
ret = zpci_disable_device(zdev);
if (ret)
- return ret;
+ goto error;
ret = zpci_enable_device(zdev);
if (ret)
- return ret;
+ goto error;
pci_rescan_bus(zdev->bus);
+ pci_unlock_rescan_remove();
+
return count;
+
+error:
+ pci_unlock_rescan_remove();
+ return ret;
}
static DEVICE_ATTR_WO(recover);
@@ -64,7 +71,7 @@ static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct pci_dev *pdev = to_pci_dev(dev);
- struct zpci_dev *zdev = get_zdev(pdev);
+ struct zpci_dev *zdev = to_zpci(pdev);
return memory_read_from_buffer(buf, count, &off, zdev->util_str,
sizeof(zdev->util_str));