aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig17
-rw-r--r--arch/s390/Makefile10
-rw-r--r--arch/s390/boot/startup.c16
-rw-r--r--arch/s390/boot/uv.c11
-rw-r--r--arch/s390/boot/uv.h7
-rw-r--r--arch/s390/boot/version.c1
-rw-r--r--arch/s390/boot/vmlinux.lds.S13
-rw-r--r--arch/s390/configs/debug_defconfig53
-rw-r--r--arch/s390/configs/defconfig49
-rw-r--r--arch/s390/configs/zfcpdump_defconfig7
-rw-r--r--arch/s390/crypto/Kconfig135
-rw-r--r--arch/s390/crypto/Makefile2
-rw-r--r--arch/s390/crypto/aes_s390.c2
-rw-r--r--arch/s390/crypto/arch_random.c217
-rw-r--r--arch/s390/crypto/chacha-glue.c2
-rw-r--r--arch/s390/crypto/crc32-vx.c2
-rw-r--r--arch/s390/crypto/des_s390.c2
-rw-r--r--arch/s390/crypto/ghash_s390.c2
-rw-r--r--arch/s390/crypto/prng.c2
-rw-r--r--arch/s390/crypto/sha1_s390.c2
-rw-r--r--arch/s390/crypto/sha256_s390.c2
-rw-r--r--arch/s390/crypto/sha3_256_s390.c2
-rw-r--r--arch/s390/crypto/sha3_512_s390.c2
-rw-r--r--arch/s390/crypto/sha512_s390.c2
-rw-r--r--arch/s390/hypfs/hypfs_diag.c2
-rw-r--r--arch/s390/hypfs/inode.c2
-rw-r--r--arch/s390/include/asm/abs_lowcore.h17
-rw-r--r--arch/s390/include/asm/airq.h7
-rw-r--r--arch/s390/include/asm/ap.h6
-rw-r--r--arch/s390/include/asm/archrandom.h40
-rw-r--r--arch/s390/include/asm/bitops.h63
-rw-r--r--arch/s390/include/asm/ccwdev.h1
-rw-r--r--arch/s390/include/asm/cpufeature.h23
-rw-r--r--arch/s390/include/asm/ctl_reg.h3
-rw-r--r--arch/s390/include/asm/dma.h6
-rw-r--r--arch/s390/include/asm/futex.h3
-rw-r--r--arch/s390/include/asm/gmap.h39
-rw-r--r--arch/s390/include/asm/hugetlb.h6
-rw-r--r--arch/s390/include/asm/jump_label.h5
-rw-r--r--arch/s390/include/asm/kexec.h14
-rw-r--r--arch/s390/include/asm/kvm_host.h39
-rw-r--r--arch/s390/include/asm/lowcore.h4
-rw-r--r--arch/s390/include/asm/maccess.h17
-rw-r--r--arch/s390/include/asm/mmu.h16
-rw-r--r--arch/s390/include/asm/mmu_context.h2
-rw-r--r--arch/s390/include/asm/nospec-insn.h2
-rw-r--r--arch/s390/include/asm/os_info.h3
-rw-r--r--arch/s390/include/asm/pai.h6
-rw-r--r--arch/s390/include/asm/pci.h13
-rw-r--r--arch/s390/include/asm/pci_clp.h9
-rw-r--r--arch/s390/include/asm/pci_insn.h29
-rw-r--r--arch/s390/include/asm/pgtable.h42
-rw-r--r--arch/s390/include/asm/processor.h20
-rw-r--r--arch/s390/include/asm/qdio.h6
-rw-r--r--arch/s390/include/asm/sclp.h8
-rw-r--r--arch/s390/include/asm/scsw.h5
-rw-r--r--arch/s390/include/asm/smp.h4
-rw-r--r--arch/s390/include/asm/softirq_stack.h3
-rw-r--r--arch/s390/include/asm/termios.h26
-rw-r--r--arch/s390/include/asm/tlb.h3
-rw-r--r--arch/s390/include/asm/tpi.h13
-rw-r--r--arch/s390/include/asm/uaccess.h5
-rw-r--r--arch/s390/include/asm/unwind.h2
-rw-r--r--arch/s390/include/asm/uv.h51
-rw-r--r--arch/s390/include/uapi/asm/dasd.h14
-rw-r--r--arch/s390/include/uapi/asm/hwctrset.h6
-rw-r--r--arch/s390/include/uapi/asm/kvm.h1
-rw-r--r--arch/s390/include/uapi/asm/termios.h50
-rw-r--r--arch/s390/kernel/Makefile10
-rw-r--r--arch/s390/kernel/abs_lowcore.c95
-rw-r--r--arch/s390/kernel/cpufeature.c46
-rw-r--r--arch/s390/kernel/crash_dump.c102
-rw-r--r--arch/s390/kernel/debug.c2
-rw-r--r--arch/s390/kernel/early.c2
-rw-r--r--arch/s390/kernel/ipl.c9
-rw-r--r--arch/s390/kernel/jump_label.c28
-rw-r--r--arch/s390/kernel/machine_kexec.c8
-rw-r--r--arch/s390/kernel/machine_kexec_file.c18
-rw-r--r--arch/s390/kernel/module.c1
-rw-r--r--arch/s390/kernel/nmi.c10
-rw-r--r--arch/s390/kernel/os_info.c10
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c23
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c21
-rw-r--r--arch/s390/kernel/perf_pai_ext.c672
-rw-r--r--arch/s390/kernel/process.c26
-rw-r--r--arch/s390/kernel/processor.c10
-rw-r--r--arch/s390/kernel/setup.c54
-rw-r--r--arch/s390/kernel/smp.c97
-rw-r--r--arch/s390/kernel/uv.c156
-rw-r--r--arch/s390/kernel/vdso.c5
-rw-r--r--arch/s390/kernel/vmlinux.lds.S1
-rw-r--r--arch/s390/kvm/Kconfig1
-rw-r--r--arch/s390/kvm/Makefile1
-rw-r--r--arch/s390/kvm/gaccess.c112
-rw-r--r--arch/s390/kvm/gaccess.h6
-rw-r--r--arch/s390/kvm/intercept.c15
-rw-r--r--arch/s390/kvm/interrupt.c98
-rw-r--r--arch/s390/kvm/kvm-s390.c484
-rw-r--r--arch/s390/kvm/kvm-s390.h16
-rw-r--r--arch/s390/kvm/pci.c702
-rw-r--r--arch/s390/kvm/pci.h87
-rw-r--r--arch/s390/kvm/priv.c26
-rw-r--r--arch/s390/kvm/pv.c269
-rw-r--r--arch/s390/kvm/sigp.c4
-rw-r--r--arch/s390/kvm/vsie.c8
-rw-r--r--arch/s390/lib/Makefile3
-rw-r--r--arch/s390/lib/delay.c11
-rw-r--r--arch/s390/lib/expoline/Makefile3
-rw-r--r--arch/s390/lib/expoline/expoline.S (renamed from arch/s390/lib/expoline.S)0
-rw-r--r--arch/s390/lib/uaccess.c9
-rw-r--r--arch/s390/mm/dump_pagetables.c20
-rw-r--r--arch/s390/mm/fault.c54
-rw-r--r--arch/s390/mm/gmap.c183
-rw-r--r--arch/s390/mm/hugetlbpage.c10
-rw-r--r--arch/s390/mm/init.c6
-rw-r--r--arch/s390/mm/maccess.c201
-rw-r--r--arch/s390/mm/mmap.c22
-rw-r--r--arch/s390/mm/vmem.c104
-rw-r--r--arch/s390/pci/Makefile2
-rw-r--r--arch/s390/pci/pci.c16
-rw-r--r--arch/s390/pci/pci_bus.c82
-rw-r--r--arch/s390/pci/pci_clp.c7
-rw-r--r--arch/s390/pci/pci_dma.c2
-rw-r--r--arch/s390/pci/pci_insn.c4
-rw-r--r--arch/s390/pci/pci_irq.c48
-rw-r--r--arch/s390/pci/pci_kvm_hook.c11
-rw-r--r--arch/s390/pci/pci_mmio.c8
-rw-r--r--arch/s390/purgatory/Makefile5
-rw-r--r--arch/s390/tools/gen_facilities.c1
129 files changed, 4079 insertions, 1164 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 91c0b80a8bf0..318fce77601d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -204,6 +204,7 @@ config S390
select IOMMU_SUPPORT if PCI
select MMU_GATHER_NO_GATHER
select MMU_GATHER_RCU_TABLE_FREE
+ select MMU_GATHER_MERGE_VMAS
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE if PCI
select NEED_SG_DMA_LENGTH if PCI
@@ -484,7 +485,6 @@ config KEXEC
config KEXEC_FILE
bool "kexec file based system call"
select KEXEC_CORE
- select BUILD_BIN2C
depends on CRYPTO
depends on CRYPTO_SHA256
depends on CRYPTO_SHA256_S390
@@ -508,21 +508,6 @@ config KEXEC_SIG
verification for the corresponding kernel image type being
loaded in order for this to work.
-config ARCH_RANDOM
- def_bool y
- prompt "s390 architectural random number generation API"
- help
- Enable the s390 architectural random number generation API
- to provide random data for all consumers within the Linux
- kernel.
-
- When enabled the arch_random_* functions declared in linux/random.h
- are implemented. The implementation is based on the s390 CPACF
- instruction subfunction TRNG which provides a real true random
- number generator.
-
- If unsure, say Y.
-
config KERNEL_NOBP
def_bool n
prompt "Enable modified branch prediction for the kernel by default"
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 495c68a4df1e..de6d8b2ea4d8 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -82,7 +82,7 @@ endif
ifdef CONFIG_EXPOLINE
ifdef CONFIG_EXPOLINE_EXTERN
- KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline.o
+ KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline/expoline.o
CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern
CC_FLAGS_EXPOLINE += -mfunction-return=thunk-extern
else
@@ -119,8 +119,6 @@ export KBUILD_CFLAGS_DECOMPRESSOR
OBJCOPYFLAGS := -O binary
-head-y := arch/s390/kernel/head64.o
-
libs-y += arch/s390/lib/
drivers-y += drivers/s390/
@@ -163,6 +161,12 @@ vdso_prepare: prepare0
$(Q)$(MAKE) $(build)=arch/s390/kernel/vdso64 include/generated/vdso64-offsets.h
$(if $(CONFIG_COMPAT),$(Q)$(MAKE) \
$(build)=arch/s390/kernel/vdso32 include/generated/vdso32-offsets.h)
+
+ifdef CONFIG_EXPOLINE_EXTERN
+modules_prepare: expoline_prepare
+expoline_prepare:
+ $(Q)$(MAKE) $(build)=arch/s390/lib/expoline arch/s390/lib/expoline/expoline.o
+endif
endif
# Don't use tabs in echo arguments
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 863e6bcaa5a1..6e7f01ca53e6 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -10,11 +10,14 @@
#include <asm/sclp.h>
#include <asm/diag.h>
#include <asm/uv.h>
+#include <asm/abs_lowcore.h>
#include "decompressor.h"
#include "boot.h"
#include "uv.h"
unsigned long __bootdata_preserved(__kaslr_offset);
+unsigned long __bootdata_preserved(__abs_lowcore);
+unsigned long __bootdata_preserved(__memcpy_real_area);
unsigned long __bootdata(__amode31_base);
unsigned long __bootdata_preserved(VMALLOC_START);
unsigned long __bootdata_preserved(VMALLOC_END);
@@ -152,6 +155,7 @@ static void setup_kernel_memory_layout(void)
unsigned long vmemmap_start;
unsigned long rte_size;
unsigned long pages;
+ unsigned long vmax;
pages = ident_map_size / PAGE_SIZE;
/* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
@@ -163,10 +167,10 @@ static void setup_kernel_memory_layout(void)
vmalloc_size > _REGION2_SIZE ||
vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN >
_REGION2_SIZE) {
- MODULES_END = _REGION1_SIZE;
+ vmax = _REGION1_SIZE;
rte_size = _REGION2_SIZE;
} else {
- MODULES_END = _REGION2_SIZE;
+ vmax = _REGION2_SIZE;
rte_size = _REGION3_SIZE;
}
/*
@@ -174,11 +178,15 @@ static void setup_kernel_memory_layout(void)
* secure storage limit, so that any vmalloc allocation
* we do could be used to back secure guest storage.
*/
- adjust_to_uv_max(&MODULES_END);
+ vmax = adjust_to_uv_max(vmax);
#ifdef CONFIG_KASAN
/* force vmalloc and modules below kasan shadow */
- MODULES_END = min(MODULES_END, KASAN_SHADOW_START);
+ vmax = min(vmax, KASAN_SHADOW_START);
#endif
+ __memcpy_real_area = round_down(vmax - PAGE_SIZE, PAGE_SIZE);
+ __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE,
+ sizeof(struct lowcore));
+ MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE);
MODULES_VADDR = MODULES_END - MODULES_LEN;
VMALLOC_END = MODULES_VADDR;
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index e6be155ab2e5..0a077c0a2056 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -41,6 +41,12 @@ void uv_query_info(void)
uv_info.max_num_sec_conf = uvcb.max_num_sec_conf;
uv_info.max_guest_cpu_id = uvcb.max_guest_cpu_id;
uv_info.uv_feature_indications = uvcb.uv_feature_indications;
+ uv_info.supp_se_hdr_ver = uvcb.supp_se_hdr_versions;
+ uv_info.supp_se_hdr_pcf = uvcb.supp_se_hdr_pcf;
+ uv_info.conf_dump_storage_state_len = uvcb.conf_dump_storage_state_len;
+ uv_info.conf_dump_finalize_len = uvcb.conf_dump_finalize_len;
+ uv_info.supp_att_req_hdr_ver = uvcb.supp_att_req_hdr_ver;
+ uv_info.supp_att_pflags = uvcb.supp_att_pflags;
}
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
@@ -51,10 +57,11 @@ void uv_query_info(void)
}
#if IS_ENABLED(CONFIG_KVM)
-void adjust_to_uv_max(unsigned long *vmax)
+unsigned long adjust_to_uv_max(unsigned long limit)
{
if (is_prot_virt_host() && uv_info.max_sec_stor_addr)
- *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr);
+ limit = min_t(unsigned long, limit, uv_info.max_sec_stor_addr);
+ return limit;
}
static int is_prot_virt_host_capable(void)
diff --git a/arch/s390/boot/uv.h b/arch/s390/boot/uv.h
index 690ce019af5a..0f3070856f8d 100644
--- a/arch/s390/boot/uv.h
+++ b/arch/s390/boot/uv.h
@@ -3,10 +3,13 @@
#define BOOT_UV_H
#if IS_ENABLED(CONFIG_KVM)
-void adjust_to_uv_max(unsigned long *vmax);
+unsigned long adjust_to_uv_max(unsigned long limit);
void sanitize_prot_virt_host(void);
#else
-static inline void adjust_to_uv_max(unsigned long *vmax) {}
+static inline unsigned long adjust_to_uv_max(unsigned long limit)
+{
+ return limit;
+}
static inline void sanitize_prot_virt_host(void) {}
#endif
diff --git a/arch/s390/boot/version.c b/arch/s390/boot/version.c
index d32e58bdda6a..fd32f038777f 100644
--- a/arch/s390/boot/version.c
+++ b/arch/s390/boot/version.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <generated/utsversion.h>
#include <generated/utsrelease.h>
#include <generated/compile.h>
#include "boot.h"
diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S
index af5c6860e0a1..fa9d33b01b85 100644
--- a/arch/s390/boot/vmlinux.lds.S
+++ b/arch/s390/boot/vmlinux.lds.S
@@ -102,8 +102,17 @@ SECTIONS
_compressed_start = .;
*(.vmlinux.bin.compressed)
_compressed_end = .;
- FILL(0xff);
- . = ALIGN(4096);
+ }
+
+#define SB_TRAILER_SIZE 32
+ /* Trailer needed for Secure Boot */
+ . += SB_TRAILER_SIZE; /* make sure .sb.trailer does not overwrite the previous section */
+ . = ALIGN(4096) - SB_TRAILER_SIZE;
+ .sb.trailer : {
+ QUAD(0)
+ QUAD(0)
+ QUAD(0)
+ QUAD(0x000000207a49504c)
}
_end = .;
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index f6dfde577ce8..2a827002934b 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -40,8 +40,6 @@ CONFIG_CHECKPOINT_RESTORE=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_EXPERT=y
# CONFIG_SYSFS_SYSCALL is not set
-CONFIG_USERFAULTFD=y
-# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_LIVEPATCH=y
CONFIG_MARCH_ZEC12=y
@@ -74,6 +72,7 @@ CONFIG_MODULES=y
CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_MODULE_SIG_SHA256=y
@@ -93,6 +92,10 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_IOSCHED_BFQ=y
CONFIG_BFQ_GROUP_IOSCHED=y
CONFIG_BINFMT_MISC=m
+CONFIG_ZSWAP=y
+CONFIG_ZSMALLOC_STAT=y
+CONFIG_SLUB_STATS=y
+# CONFIG_COMPAT_BRK is not set
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
@@ -102,14 +105,12 @@ CONFIG_CMA_DEBUGFS=y
CONFIG_CMA_SYSFS=y
CONFIG_CMA_AREAS=7
CONFIG_MEM_SOFT_DIRTY=y
-CONFIG_ZSWAP=y
-CONFIG_ZSMALLOC=y
-CONFIG_ZSMALLOC_STAT=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_PERCPU_STATS=y
CONFIG_GUP_TEST=y
CONFIG_ANON_VMA_NAME=y
+CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -167,6 +168,7 @@ CONFIG_BRIDGE_NETFILTER=m
CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_PROCFS=y
CONFIG_NF_CONNTRACK_EVENTS=y
CONFIG_NF_CONNTRACK_TIMEOUT=y
CONFIG_NF_CONNTRACK_TIMESTAMP=y
@@ -493,7 +495,6 @@ CONFIG_NLMON=m
# CONFIG_NET_VENDOR_ASIX is not set
# CONFIG_NET_VENDOR_ATHEROS is not set
# CONFIG_NET_VENDOR_BROADCOM is not set
-# CONFIG_NET_VENDOR_BROCADE is not set
# CONFIG_NET_VENDOR_CADENCE is not set
# CONFIG_NET_VENDOR_CAVIUM is not set
# CONFIG_NET_VENDOR_CHELSIO is not set
@@ -509,7 +510,7 @@ CONFIG_NLMON=m
# CONFIG_NET_VENDOR_GOOGLE is not set
# CONFIG_NET_VENDOR_HUAWEI is not set
# CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_MICROSOFT is not set
+# CONFIG_NET_VENDOR_WANGXUN is not set
# CONFIG_NET_VENDOR_LITEX is not set
# CONFIG_NET_VENDOR_MARVELL is not set
CONFIG_MLX4_EN=m
@@ -518,16 +519,18 @@ CONFIG_MLX5_CORE_EN=y
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MICROCHIP is not set
# CONFIG_NET_VENDOR_MICROSEMI is not set
+# CONFIG_NET_VENDOR_MICROSOFT is not set
# CONFIG_NET_VENDOR_MYRI is not set
+# CONFIG_NET_VENDOR_NI is not set
# CONFIG_NET_VENDOR_NATSEMI is not set
# CONFIG_NET_VENDOR_NETERION is not set
# CONFIG_NET_VENDOR_NETRONOME is not set
-# CONFIG_NET_VENDOR_NI is not set
# CONFIG_NET_VENDOR_NVIDIA is not set
# CONFIG_NET_VENDOR_OKI is not set
# CONFIG_NET_VENDOR_PACKET_ENGINES is not set
# CONFIG_NET_VENDOR_PENSANDO is not set
# CONFIG_NET_VENDOR_QLOGIC is not set
+# CONFIG_NET_VENDOR_BROCADE is not set
# CONFIG_NET_VENDOR_QUALCOMM is not set
# CONFIG_NET_VENDOR_RDC is not set
# CONFIG_NET_VENDOR_REALTEK is not set
@@ -535,9 +538,9 @@ CONFIG_MLX5_CORE_EN=y
# CONFIG_NET_VENDOR_ROCKER is not set
# CONFIG_NET_VENDOR_SAMSUNG is not set
# CONFIG_NET_VENDOR_SEEQ is not set
-# CONFIG_NET_VENDOR_SOLARFLARE is not set
# CONFIG_NET_VENDOR_SILAN is not set
# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SOLARFLARE is not set
# CONFIG_NET_VENDOR_SMSC is not set
# CONFIG_NET_VENDOR_SOCIONEXT is not set
# CONFIG_NET_VENDOR_STMICRO is not set
@@ -570,6 +573,8 @@ CONFIG_VIRTIO_CONSOLE=m
CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_HANGCHECK_TIMER=m
CONFIG_TN3270_FS=y
+# CONFIG_RANDOM_TRUST_CPU is not set
+# CONFIG_RANDOM_TRUST_BOOTLOADER is not set
CONFIG_PPS=m
# CONFIG_PTP_1588_CLOCK is not set
# CONFIG_HWMON is not set
@@ -727,18 +732,26 @@ CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
+CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_BLAKE2S=m
+CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
+CONFIG_CRYPTO_SHA3_256_S390=m
+CONFIG_CRYPTO_SHA3_512_S390=m
+CONFIG_CRYPTO_SM3_GENERIC=m
CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_AES_TI=m
+CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_ANUBIS=m
CONFIG_CRYPTO_ARC4=m
CONFIG_CRYPTO_BLOWFISH=m
@@ -746,11 +759,14 @@ CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
+CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_CHACHA_S390=m
CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_ARIA=m
CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_842=m
@@ -766,16 +782,6 @@ CONFIG_CRYPTO_STATS=y
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
-CONFIG_CRYPTO_SHA1_S390=m
-CONFIG_CRYPTO_SHA256_S390=m
-CONFIG_CRYPTO_SHA512_S390=m
-CONFIG_CRYPTO_SHA3_256_S390=m
-CONFIG_CRYPTO_SHA3_512_S390=m
-CONFIG_CRYPTO_DES_S390=m
-CONFIG_CRYPTO_AES_S390=m
-CONFIG_CRYPTO_CHACHA_S390=m
-CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_CORDIC=m
CONFIG_CRYPTO_LIB_CURVE25519=m
@@ -797,6 +803,7 @@ CONFIG_HEADERS_INSTALL=y
CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_SLUB_DEBUG_ON=y
CONFIG_PAGE_OWNER=y
CONFIG_DEBUG_RODATA_TEST=y
CONFIG_DEBUG_WX=y
@@ -808,8 +815,6 @@ CONFIG_DEBUG_OBJECTS_TIMERS=y
CONFIG_DEBUG_OBJECTS_WORK=y
CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y
-CONFIG_SLUB_DEBUG_ON=y
-CONFIG_SLUB_STATS=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_VM=y
CONFIG_DEBUG_VM_PGFLAGS=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 706df3a4a867..fb780e80e4c8 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -38,8 +38,6 @@ CONFIG_CHECKPOINT_RESTORE=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_EXPERT=y
# CONFIG_SYSFS_SYSCALL is not set
-CONFIG_USERFAULTFD=y
-# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_LIVEPATCH=y
CONFIG_MARCH_ZEC12=y
@@ -69,6 +67,7 @@ CONFIG_MODULES=y
CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_MODULE_SIG_SHA256=y
@@ -88,6 +87,9 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_IOSCHED_BFQ=y
CONFIG_BFQ_GROUP_IOSCHED=y
CONFIG_BINFMT_MISC=m
+CONFIG_ZSWAP=y
+CONFIG_ZSMALLOC_STAT=y
+# CONFIG_COMPAT_BRK is not set
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
@@ -95,13 +97,11 @@ CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_CMA_SYSFS=y
CONFIG_CMA_AREAS=7
CONFIG_MEM_SOFT_DIRTY=y
-CONFIG_ZSWAP=y
-CONFIG_ZSMALLOC=y
-CONFIG_ZSMALLOC_STAT=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_PERCPU_STATS=y
CONFIG_ANON_VMA_NAME=y
+CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -159,6 +159,7 @@ CONFIG_BRIDGE_NETFILTER=m
CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_PROCFS=y
CONFIG_NF_CONNTRACK_EVENTS=y
CONFIG_NF_CONNTRACK_TIMEOUT=y
CONFIG_NF_CONNTRACK_TIMESTAMP=y
@@ -484,7 +485,6 @@ CONFIG_NLMON=m
# CONFIG_NET_VENDOR_ASIX is not set
# CONFIG_NET_VENDOR_ATHEROS is not set
# CONFIG_NET_VENDOR_BROADCOM is not set
-# CONFIG_NET_VENDOR_BROCADE is not set
# CONFIG_NET_VENDOR_CADENCE is not set
# CONFIG_NET_VENDOR_CAVIUM is not set
# CONFIG_NET_VENDOR_CHELSIO is not set
@@ -500,7 +500,7 @@ CONFIG_NLMON=m
# CONFIG_NET_VENDOR_GOOGLE is not set
# CONFIG_NET_VENDOR_HUAWEI is not set
# CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_MICROSOFT is not set
+# CONFIG_NET_VENDOR_WANGXUN is not set
# CONFIG_NET_VENDOR_LITEX is not set
# CONFIG_NET_VENDOR_MARVELL is not set
CONFIG_MLX4_EN=m
@@ -509,16 +509,18 @@ CONFIG_MLX5_CORE_EN=y
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MICROCHIP is not set
# CONFIG_NET_VENDOR_MICROSEMI is not set
+# CONFIG_NET_VENDOR_MICROSOFT is not set
# CONFIG_NET_VENDOR_MYRI is not set
+# CONFIG_NET_VENDOR_NI is not set
# CONFIG_NET_VENDOR_NATSEMI is not set
# CONFIG_NET_VENDOR_NETERION is not set
# CONFIG_NET_VENDOR_NETRONOME is not set
-# CONFIG_NET_VENDOR_NI is not set
# CONFIG_NET_VENDOR_NVIDIA is not set
# CONFIG_NET_VENDOR_OKI is not set
# CONFIG_NET_VENDOR_PACKET_ENGINES is not set
# CONFIG_NET_VENDOR_PENSANDO is not set
# CONFIG_NET_VENDOR_QLOGIC is not set
+# CONFIG_NET_VENDOR_BROCADE is not set
# CONFIG_NET_VENDOR_QUALCOMM is not set
# CONFIG_NET_VENDOR_RDC is not set
# CONFIG_NET_VENDOR_REALTEK is not set
@@ -526,9 +528,9 @@ CONFIG_MLX5_CORE_EN=y
# CONFIG_NET_VENDOR_ROCKER is not set
# CONFIG_NET_VENDOR_SAMSUNG is not set
# CONFIG_NET_VENDOR_SEEQ is not set
-# CONFIG_NET_VENDOR_SOLARFLARE is not set
# CONFIG_NET_VENDOR_SILAN is not set
# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SOLARFLARE is not set
# CONFIG_NET_VENDOR_SMSC is not set
# CONFIG_NET_VENDOR_SOCIONEXT is not set
# CONFIG_NET_VENDOR_STMICRO is not set
@@ -561,6 +563,8 @@ CONFIG_VIRTIO_CONSOLE=m
CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_HANGCHECK_TIMER=m
CONFIG_TN3270_FS=y
+# CONFIG_RANDOM_TRUST_CPU is not set
+# CONFIG_RANDOM_TRUST_BOOTLOADER is not set
# CONFIG_PTP_1588_CLOCK is not set
# CONFIG_HWMON is not set
CONFIG_WATCHDOG=y
@@ -713,18 +717,26 @@ CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
+CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_BLAKE2S=m
+CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
+CONFIG_CRYPTO_SHA3_256_S390=m
+CONFIG_CRYPTO_SHA3_512_S390=m
+CONFIG_CRYPTO_SM3_GENERIC=m
CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_AES_TI=m
+CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_ANUBIS=m
CONFIG_CRYPTO_ARC4=m
CONFIG_CRYPTO_BLOWFISH=m
@@ -732,11 +744,14 @@ CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
+CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_CHACHA_S390=m
CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_ARIA=m
CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_842=m
@@ -752,16 +767,6 @@ CONFIG_CRYPTO_STATS=y
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
-CONFIG_CRYPTO_SHA1_S390=m
-CONFIG_CRYPTO_SHA256_S390=m
-CONFIG_CRYPTO_SHA512_S390=m
-CONFIG_CRYPTO_SHA3_256_S390=m
-CONFIG_CRYPTO_SHA3_512_S390=m
-CONFIG_CRYPTO_DES_S390=m
-CONFIG_CRYPTO_AES_S390=m
-CONFIG_CRYPTO_CHACHA_S390=m
-CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_CORDIC=m
CONFIG_PRIME_NUMBERS=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index a87fcc45e307..a5576b8d4081 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -1,4 +1,3 @@
-# CONFIG_SWAP is not set
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_BPF_SYSCALL=y
@@ -9,13 +8,11 @@ CONFIG_BPF_SYSCALL=y
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-# CONFIG_COMPAT_BRK is not set
CONFIG_MARCH_ZEC12=y
CONFIG_TUNE_ZEC12=y
# CONFIG_COMPAT is not set
CONFIG_NR_CPUS=2
CONFIG_HZ_100=y
-# CONFIG_ARCH_RANDOM is not set
# CONFIG_RELOCATABLE is not set
# CONFIG_CHSC_SCH is not set
# CONFIG_SCM_BUS is not set
@@ -29,6 +26,8 @@ CONFIG_CRASH_DUMP=y
# CONFIG_BLOCK_LEGACY_AUTOLOAD is not set
CONFIG_PARTITION_ADVANCED=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_SWAP is not set
+# CONFIG_COMPAT_BRK is not set
# CONFIG_COMPACTION is not set
# CONFIG_MIGRATION is not set
CONFIG_NET=y
@@ -54,10 +53,12 @@ CONFIG_ZFCP=y
# CONFIG_HVC_IUCV is not set
# CONFIG_HW_RANDOM_S390 is not set
# CONFIG_HMC_DRV is not set
+# CONFIG_S390_UV_UAPI is not set
# CONFIG_S390_TAPE is not set
# CONFIG_VMCP is not set
# CONFIG_MONWRITER is not set
# CONFIG_S390_VMUR is not set
+# CONFIG_RANDOM_TRUST_BOOTLOADER is not set
# CONFIG_HID is not set
# CONFIG_VIRTIO_MENU is not set
# CONFIG_VHOST_MENU is not set
diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig
new file mode 100644
index 000000000000..06ee706b0d78
--- /dev/null
+++ b/arch/s390/crypto/Kconfig
@@ -0,0 +1,135 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "Accelerated Cryptographic Algorithms for CPU (s390)"
+
+config CRYPTO_CRC32_S390
+ tristate "CRC32c and CRC32"
+ depends on S390
+ select CRYPTO_HASH
+ select CRC32
+ help
+ CRC32c and CRC32 CRC algorithms
+
+ Architecture: s390
+
+ It is available with IBM z13 or later.
+
+config CRYPTO_SHA512_S390
+ tristate "Hash functions: SHA-384 and SHA-512"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ SHA-384 and SHA-512 secure hash algorithms (FIPS 180)
+
+ Architecture: s390
+
+ It is available as of z10.
+
+config CRYPTO_SHA1_S390
+ tristate "Hash functions: SHA-1"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ SHA-1 secure hash algorithm (FIPS 180)
+
+ Architecture: s390
+
+ It is available as of z990.
+
+config CRYPTO_SHA256_S390
+ tristate "Hash functions: SHA-224 and SHA-256"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
+
+ Architecture: s390
+
+ It is available as of z9.
+
+config CRYPTO_SHA3_256_S390
+ tristate "Hash functions: SHA3-224 and SHA3-256"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ SHA3-224 and SHA3-256 secure hash algorithms (FIPS 202)
+
+ Architecture: s390
+
+ It is available as of z14.
+
+config CRYPTO_SHA3_512_S390
+ tristate "Hash functions: SHA3-384 and SHA3-512"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ SHA3-384 and SHA3-512 secure hash algorithms (FIPS 202)
+
+ Architecture: s390
+
+ It is available as of z14.
+
+config CRYPTO_GHASH_S390
+ tristate "Hash functions: GHASH"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ GCM GHASH hash function (NIST SP800-38D)
+
+ Architecture: s390
+
+ It is available as of z196.
+
+config CRYPTO_AES_S390
+ tristate "Ciphers: AES, modes: ECB, CBC, CTR, XTS, GCM"
+ depends on S390
+ select CRYPTO_ALGAPI
+ select CRYPTO_SKCIPHER
+ help
+ Block cipher: AES cipher algorithms (FIPS 197)
+ AEAD cipher: AES with GCM
+ Length-preserving ciphers: AES with ECB, CBC, XTS, and CTR modes
+
+ Architecture: s390
+
+ As of z9 the ECB and CBC modes are hardware accelerated
+ for 128 bit keys.
+
+ As of z10 the ECB and CBC modes are hardware accelerated
+ for all AES key sizes.
+
+ As of z196 the CTR mode is hardware accelerated for all AES
+ key sizes and XTS mode is hardware accelerated for 256 and
+ 512 bit keys.
+
+config CRYPTO_DES_S390
+ tristate "Ciphers: DES and Triple DES EDE, modes: ECB, CBC, CTR"
+ depends on S390
+ select CRYPTO_ALGAPI
+ select CRYPTO_SKCIPHER
+ select CRYPTO_LIB_DES
+ help
+ Block ciphers: DES (FIPS 46-2) cipher algorithm
+ Block ciphers: Triple DES EDE (FIPS 46-3) cipher algorithm
+ Length-preserving ciphers: DES with ECB, CBC, and CTR modes
+ Length-preserving ciphers: Triple DES EDED with ECB, CBC, and CTR modes
+
+ Architecture: s390
+
+ As of z990 the ECB and CBC mode are hardware accelerated.
+ As of z196 the CTR mode is hardware accelerated.
+
+config CRYPTO_CHACHA_S390
+ tristate "Ciphers: ChaCha20"
+ depends on S390
+ select CRYPTO_SKCIPHER
+ select CRYPTO_LIB_CHACHA_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
+ help
+ Length-preserving cipher: ChaCha20 stream cipher (RFC 7539)
+
+ Architecture: s390
+
+ It is available as of z13.
+
+endmenu
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index c63abfeb6d17..1b1cc478fa94 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -15,7 +15,7 @@ obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o
obj-$(CONFIG_S390_PRNG) += prng.o
obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o
-obj-$(CONFIG_ARCH_RANDOM) += arch_random.o
+obj-y += arch_random.o
crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o
chacha_s390-y := chacha-glue.o chacha-s390.o
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 1023e9d43d44..526c3f40f6a2 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -1049,7 +1049,7 @@ out_err:
return ret;
}
-module_cpu_feature_match(MSA, aes_s390_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, aes_s390_init);
module_exit(aes_s390_fini);
MODULE_ALIAS_CRYPTO("aes-all");
diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c
index 56007c763902..1f2d40993c4d 100644
--- a/arch/s390/crypto/arch_random.c
+++ b/arch/s390/crypto/arch_random.c
@@ -4,232 +4,15 @@
*
* Copyright IBM Corp. 2017, 2020
* Author(s): Harald Freudenberger
- *
- * The s390_arch_random_generate() function may be called from random.c
- * in interrupt context. So this implementation does the best to be very
- * fast. There is a buffer of random data which is asynchronously checked
- * and filled by a workqueue thread.
- * If there are enough bytes in the buffer the s390_arch_random_generate()
- * just delivers these bytes. Otherwise false is returned until the
- * worker thread refills the buffer.
- * The worker fills the rng buffer by pulling fresh entropy from the
- * high quality (but slow) true hardware random generator. This entropy
- * is then spread over the buffer with an pseudo random generator PRNG.
- * As the arch_get_random_seed_long() fetches 8 bytes and the calling
- * function add_interrupt_randomness() counts this as 1 bit entropy the
- * distribution needs to make sure there is in fact 1 bit entropy contained
- * in 8 bytes of the buffer. The current values pull 32 byte entropy
- * and scatter this into a 2048 byte buffer. So 8 byte in the buffer
- * will contain 1 bit of entropy.
- * The worker thread is rescheduled based on the charge level of the
- * buffer but at least with 500 ms delay to avoid too much CPU consumption.
- * So the max. amount of rng data delivered via arch_get_random_seed is
- * limited to 4k bytes per second.
*/
#include <linux/kernel.h>
#include <linux/atomic.h>
#include <linux/random.h>
-#include <linux/slab.h>
#include <linux/static_key.h>
-#include <linux/workqueue.h>
-#include <linux/moduleparam.h>
#include <asm/cpacf.h>
DEFINE_STATIC_KEY_FALSE(s390_arch_random_available);
atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0);
EXPORT_SYMBOL(s390_arch_random_counter);
-
-#define ARCH_REFILL_TICKS (HZ/2)
-#define ARCH_PRNG_SEED_SIZE 32
-#define ARCH_RNG_BUF_SIZE 2048
-
-static DEFINE_SPINLOCK(arch_rng_lock);
-static u8 *arch_rng_buf;
-static unsigned int arch_rng_buf_idx;
-
-static void arch_rng_refill_buffer(struct work_struct *);
-static DECLARE_DELAYED_WORK(arch_rng_work, arch_rng_refill_buffer);
-
-bool s390_arch_random_generate(u8 *buf, unsigned int nbytes)
-{
- /* max hunk is ARCH_RNG_BUF_SIZE */
- if (nbytes > ARCH_RNG_BUF_SIZE)
- return false;
-
- /* lock rng buffer */
- if (!spin_trylock(&arch_rng_lock))
- return false;
-
- /* try to resolve the requested amount of bytes from the buffer */
- arch_rng_buf_idx -= nbytes;
- if (arch_rng_buf_idx < ARCH_RNG_BUF_SIZE) {
- memcpy(buf, arch_rng_buf + arch_rng_buf_idx, nbytes);
- atomic64_add(nbytes, &s390_arch_random_counter);
- spin_unlock(&arch_rng_lock);
- return true;
- }
-
- /* not enough bytes in rng buffer, refill is done asynchronously */
- spin_unlock(&arch_rng_lock);
-
- return false;
-}
-EXPORT_SYMBOL(s390_arch_random_generate);
-
-static void arch_rng_refill_buffer(struct work_struct *unused)
-{
- unsigned int delay = ARCH_REFILL_TICKS;
-
- spin_lock(&arch_rng_lock);
- if (arch_rng_buf_idx > ARCH_RNG_BUF_SIZE) {
- /* buffer is exhausted and needs refill */
- u8 seed[ARCH_PRNG_SEED_SIZE];
- u8 prng_wa[240];
- /* fetch ARCH_PRNG_SEED_SIZE bytes of entropy */
- cpacf_trng(NULL, 0, seed, sizeof(seed));
- /* blow this entropy up to ARCH_RNG_BUF_SIZE with PRNG */
- memset(prng_wa, 0, sizeof(prng_wa));
- cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
- &prng_wa, NULL, 0, seed, sizeof(seed));
- cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
- &prng_wa, arch_rng_buf, ARCH_RNG_BUF_SIZE, NULL, 0);
- arch_rng_buf_idx = ARCH_RNG_BUF_SIZE;
- }
- delay += (ARCH_REFILL_TICKS * arch_rng_buf_idx) / ARCH_RNG_BUF_SIZE;
- spin_unlock(&arch_rng_lock);
-
- /* kick next check */
- queue_delayed_work(system_long_wq, &arch_rng_work, delay);
-}
-
-/*
- * Here follows the implementation of s390_arch_get_random_long().
- *
- * The random longs to be pulled by arch_get_random_long() are
- * prepared in an 4K buffer which is filled from the NIST 800-90
- * compliant s390 drbg. By default the random long buffer is refilled
- * 256 times before the drbg itself needs a reseed. The reseed of the
- * drbg is done with 32 bytes fetched from the high quality (but slow)
- * trng which is assumed to deliver 100% entropy. So the 32 * 8 = 256
- * bits of entropy are spread over 256 * 4KB = 1MB serving 131072
- * arch_get_random_long() invocations before reseeded.
- *
- * How often the 4K random long buffer is refilled with the drbg
- * before the drbg is reseeded can be adjusted. There is a module
- * parameter 's390_arch_rnd_long_drbg_reseed' accessible via
- * /sys/module/arch_random/parameters/rndlong_drbg_reseed
- * or as kernel command line parameter
- * arch_random.rndlong_drbg_reseed=<value>
- * This parameter tells how often the drbg fills the 4K buffer before
- * it is re-seeded by fresh entropy from the trng.
- * A value of 16 results in reseeding the drbg at every 16 * 4 KB = 64
- * KB with 32 bytes of fresh entropy pulled from the trng. So a value
- * of 16 would result in 256 bits entropy per 64 KB.
- * A value of 256 results in 1MB of drbg output before a reseed of the
- * drbg is done. So this would spread the 256 bits of entropy among 1MB.
- * Setting this parameter to 0 forces the reseed to take place every
- * time the 4K buffer is depleted, so the entropy rises to 256 bits
- * entropy per 4K or 0.5 bit entropy per arch_get_random_long(). With
- * setting this parameter to negative values all this effort is
- * disabled, arch_get_random long() returns false and thus indicating
- * that the arch_get_random_long() feature is disabled at all.
- */
-
-static unsigned long rndlong_buf[512];
-static DEFINE_SPINLOCK(rndlong_lock);
-static int rndlong_buf_index;
-
-static int rndlong_drbg_reseed = 256;
-module_param_named(rndlong_drbg_reseed, rndlong_drbg_reseed, int, 0600);
-MODULE_PARM_DESC(rndlong_drbg_reseed, "s390 arch_get_random_long() drbg reseed");
-
-static inline void refill_rndlong_buf(void)
-{
- static u8 prng_ws[240];
- static int drbg_counter;
-
- if (--drbg_counter < 0) {
- /* need to re-seed the drbg */
- u8 seed[32];
-
- /* fetch seed from trng */
- cpacf_trng(NULL, 0, seed, sizeof(seed));
- /* seed drbg */
- memset(prng_ws, 0, sizeof(prng_ws));
- cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
- &prng_ws, NULL, 0, seed, sizeof(seed));
- /* re-init counter for drbg */
- drbg_counter = rndlong_drbg_reseed;
- }
-
- /* fill the arch_get_random_long buffer from drbg */
- cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, &prng_ws,
- (u8 *) rndlong_buf, sizeof(rndlong_buf),
- NULL, 0);
-}
-
-bool s390_arch_get_random_long(unsigned long *v)
-{
- bool rc = false;
- unsigned long flags;
-
- /* arch_get_random_long() disabled ? */
- if (rndlong_drbg_reseed < 0)
- return false;
-
- /* try to lock the random long lock */
- if (!spin_trylock_irqsave(&rndlong_lock, flags))
- return false;
-
- if (--rndlong_buf_index >= 0) {
- /* deliver next long value from the buffer */
- *v = rndlong_buf[rndlong_buf_index];
- rc = true;
- goto out;
- }
-
- /* buffer is depleted and needs refill */
- if (in_interrupt()) {
- /* delay refill in interrupt context to next caller */
- rndlong_buf_index = 0;
- goto out;
- }
-
- /* refill random long buffer */
- refill_rndlong_buf();
- rndlong_buf_index = ARRAY_SIZE(rndlong_buf);
-
- /* and provide one random long */
- *v = rndlong_buf[--rndlong_buf_index];
- rc = true;
-
-out:
- spin_unlock_irqrestore(&rndlong_lock, flags);
- return rc;
-}
-EXPORT_SYMBOL(s390_arch_get_random_long);
-
-static int __init s390_arch_random_init(void)
-{
- /* all the needed PRNO subfunctions available ? */
- if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG) &&
- cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) {
-
- /* alloc arch random working buffer */
- arch_rng_buf = kmalloc(ARCH_RNG_BUF_SIZE, GFP_KERNEL);
- if (!arch_rng_buf)
- return -ENOMEM;
-
- /* kick worker queue job to fill the random buffer */
- queue_delayed_work(system_long_wq,
- &arch_rng_work, ARCH_REFILL_TICKS);
-
- /* enable arch random to the outside world */
- static_branch_enable(&s390_arch_random_available);
- }
-
- return 0;
-}
-arch_initcall(s390_arch_random_init);
diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c
index 2ec51f339cec..7752bd314558 100644
--- a/arch/s390/crypto/chacha-glue.c
+++ b/arch/s390/crypto/chacha-glue.c
@@ -121,7 +121,7 @@ static void __exit chacha_mod_fini(void)
crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
}
-module_cpu_feature_match(VXRS, chacha_mod_init);
+module_cpu_feature_match(S390_CPU_FEATURE_VXRS, chacha_mod_init);
module_exit(chacha_mod_fini);
MODULE_DESCRIPTION("ChaCha20 stream cipher");
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
index fafecad20752..017143e9cef7 100644
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -298,7 +298,7 @@ static void __exit crc_vx_mod_exit(void)
crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs));
}
-module_cpu_feature_match(VXRS, crc_vx_mod_init);
+module_cpu_feature_match(S390_CPU_FEATURE_VXRS, crc_vx_mod_init);
module_exit(crc_vx_mod_exit);
MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index e013088b5115..8e75b83a5ddc 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -492,7 +492,7 @@ out_err:
return ret;
}
-module_cpu_feature_match(MSA, des_s390_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, des_s390_init);
module_exit(des_s390_exit);
MODULE_ALIAS_CRYPTO("des");
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index 6b07a2f1ce8a..0800a2a5799f 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -145,7 +145,7 @@ static void __exit ghash_mod_exit(void)
crypto_unregister_shash(&ghash_alg);
}
-module_cpu_feature_match(MSA, ghash_mod_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, ghash_mod_init);
module_exit(ghash_mod_exit);
MODULE_ALIAS_CRYPTO("ghash");
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index ae382bafc772..a077087bc6cc 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -907,5 +907,5 @@ static void __exit prng_exit(void)
}
}
-module_cpu_feature_match(MSA, prng_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, prng_init);
module_exit(prng_exit);
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index a3fabf310a38..bc3a22704e09 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -95,7 +95,7 @@ static void __exit sha1_s390_fini(void)
crypto_unregister_shash(&alg);
}
-module_cpu_feature_match(MSA, sha1_s390_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha1_s390_init);
module_exit(sha1_s390_fini);
MODULE_ALIAS_CRYPTO("sha1");
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index 24983f175676..6f1ccdf93d3e 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -134,7 +134,7 @@ static void __exit sha256_s390_fini(void)
crypto_unregister_shash(&sha256_alg);
}
-module_cpu_feature_match(MSA, sha256_s390_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha256_s390_init);
module_exit(sha256_s390_fini);
MODULE_ALIAS_CRYPTO("sha256");
diff --git a/arch/s390/crypto/sha3_256_s390.c b/arch/s390/crypto/sha3_256_s390.c
index 30ac49b635bf..e1350e033a32 100644
--- a/arch/s390/crypto/sha3_256_s390.c
+++ b/arch/s390/crypto/sha3_256_s390.c
@@ -137,7 +137,7 @@ static void __exit sha3_256_s390_fini(void)
crypto_unregister_shash(&sha3_256_alg);
}
-module_cpu_feature_match(MSA, sha3_256_s390_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha3_256_s390_init);
module_exit(sha3_256_s390_fini);
MODULE_ALIAS_CRYPTO("sha3-256");
diff --git a/arch/s390/crypto/sha3_512_s390.c b/arch/s390/crypto/sha3_512_s390.c
index e70d50f7620f..06c142ed9bb1 100644
--- a/arch/s390/crypto/sha3_512_s390.c
+++ b/arch/s390/crypto/sha3_512_s390.c
@@ -147,7 +147,7 @@ static void __exit fini(void)
crypto_unregister_shash(&sha3_384_alg);
}
-module_cpu_feature_match(MSA, init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, init);
module_exit(fini);
MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index 43ce4956df73..04f11c407763 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -142,7 +142,7 @@ static void __exit fini(void)
crypto_unregister_shash(&sha384_alg);
}
-module_cpu_feature_match(MSA, init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, init);
module_exit(fini);
MODULE_LICENSE("GPL");
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index f0bc4dc3e9bf..6511d15ace45 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -437,7 +437,7 @@ __init int hypfs_diag_init(void)
int rc;
if (diag204_probe()) {
- pr_err("The hardware system does not support hypfs\n");
+ pr_info("The hardware system does not support hypfs\n");
return -ENODATA;
}
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 5c97f48cea91..ee919bfc8186 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -496,9 +496,9 @@ fail_hypfs_sprp_exit:
hypfs_vm_exit();
fail_hypfs_diag_exit:
hypfs_diag_exit();
+ pr_err("Initialization of hypfs failed with rc=%i\n", rc);
fail_dbfs_exit:
hypfs_dbfs_exit();
- pr_err("Initialization of hypfs failed with rc=%i\n", rc);
return rc;
}
device_initcall(hypfs_init)
diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h
new file mode 100644
index 000000000000..4c61b14ee928
--- /dev/null
+++ b/arch/s390/include/asm/abs_lowcore.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ABS_LOWCORE_H
+#define _ASM_S390_ABS_LOWCORE_H
+
+#include <asm/lowcore.h>
+
+#define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore))
+
+extern unsigned long __abs_lowcore;
+extern bool abs_lowcore_mapped;
+
+struct lowcore *get_abs_lowcore(unsigned long *flags);
+void put_abs_lowcore(struct lowcore *lc, unsigned long flags);
+int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc);
+void abs_lowcore_unmap(int cpu);
+
+#endif /* _ASM_S390_ABS_LOWCORE_H */
diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
index 01936fdfaddb..e82e5626e139 100644
--- a/arch/s390/include/asm/airq.h
+++ b/arch/s390/include/asm/airq.h
@@ -12,10 +12,11 @@
#include <linux/bit_spinlock.h>
#include <linux/dma-mapping.h>
+#include <asm/tpi.h>
struct airq_struct {
struct hlist_node list; /* Handler queueing. */
- void (*handler)(struct airq_struct *airq, bool floating);
+ void (*handler)(struct airq_struct *airq, struct tpi_info *tpi_info);
u8 *lsi_ptr; /* Local-Summary-Indicator pointer */
u8 lsi_mask; /* Local-Summary-Indicator mask */
u8 isc; /* Interrupt-subclass */
@@ -46,8 +47,10 @@ struct airq_iv {
#define AIRQ_IV_PTR 4 /* Allocate the ptr array */
#define AIRQ_IV_DATA 8 /* Allocate the data array */
#define AIRQ_IV_CACHELINE 16 /* Cacheline alignment for the vector */
+#define AIRQ_IV_GUESTVEC 32 /* Vector is a pinned guest page */
-struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags);
+struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags,
+ unsigned long *vec);
void airq_iv_release(struct airq_iv *iv);
unsigned long airq_iv_alloc(struct airq_iv *iv, unsigned long num);
void airq_iv_free(struct airq_iv *iv, unsigned long bit, unsigned long num);
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index b515cfa62bd9..f508f5025e38 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -227,13 +227,13 @@ struct ap_qirq_ctrl {
* ap_aqic(): Control interruption for a specific AP.
* @qid: The AP queue number
* @qirqctrl: struct ap_qirq_ctrl (64 bit value)
- * @ind: The notification indicator byte
+ * @pa_ind: Physical address of the notification indicator byte
*
* Returns AP queue status.
*/
static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
struct ap_qirq_ctrl qirqctrl,
- void *ind)
+ phys_addr_t pa_ind)
{
unsigned long reg0 = qid | (3UL << 24); /* fc 3UL is AQIC */
union {
@@ -241,7 +241,7 @@ static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
struct ap_qirq_ctrl qirqctrl;
struct ap_queue_status status;
} reg1;
- unsigned long reg2 = virt_to_phys(ind);
+ unsigned long reg2 = pa_ind;
reg1.qirqctrl = qirqctrl;
diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h
index 5dc712fde3c7..1594049893e0 100644
--- a/arch/s390/include/asm/archrandom.h
+++ b/arch/s390/include/asm/archrandom.h
@@ -2,7 +2,7 @@
/*
* Kernel interface for the s390 arch_random_* functions
*
- * Copyright IBM Corp. 2017, 2020
+ * Copyright IBM Corp. 2017, 2022
*
* Author: Harald Freudenberger <freude@de.ibm.com>
*
@@ -11,44 +11,28 @@
#ifndef _ASM_S390_ARCHRANDOM_H
#define _ASM_S390_ARCHRANDOM_H
-#ifdef CONFIG_ARCH_RANDOM
-
#include <linux/static_key.h>
+#include <linux/preempt.h>
#include <linux/atomic.h>
+#include <asm/cpacf.h>
DECLARE_STATIC_KEY_FALSE(s390_arch_random_available);
extern atomic64_t s390_arch_random_counter;
-bool s390_arch_get_random_long(unsigned long *v);
-bool s390_arch_random_generate(u8 *buf, unsigned int nbytes);
-
-static inline bool __must_check arch_get_random_long(unsigned long *v)
-{
- if (static_branch_likely(&s390_arch_random_available))
- return s390_arch_get_random_long(v);
- return false;
-}
-
-static inline bool __must_check arch_get_random_int(unsigned int *v)
+static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs)
{
- return false;
-}
-
-static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
-{
- if (static_branch_likely(&s390_arch_random_available)) {
- return s390_arch_random_generate((u8 *)v, sizeof(*v));
- }
- return false;
+ return 0;
}
-static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
+static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs)
{
- if (static_branch_likely(&s390_arch_random_available)) {
- return s390_arch_random_generate((u8 *)v, sizeof(*v));
+ if (static_branch_likely(&s390_arch_random_available) &&
+ in_task()) {
+ cpacf_trng(NULL, 0, (u8 *)v, max_longs * sizeof(*v));
+ atomic64_add(max_longs * sizeof(*v), &s390_arch_random_counter);
+ return max_longs;
}
- return false;
+ return 0;
}
-#endif /* CONFIG_ARCH_RANDOM */
#endif /* _ASM_S390_ARCHRANDOM_H */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 191dc7898b0f..2de74fcd0578 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -113,76 +113,71 @@ static inline bool arch_test_and_change_bit(unsigned long nr,
return old & mask;
}
-static inline void arch___set_bit(unsigned long nr, volatile unsigned long *ptr)
+static __always_inline void
+arch___set_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long *p = __bitops_word(nr, addr);
unsigned long mask = __bitops_mask(nr);
- *addr |= mask;
+ *p |= mask;
}
-static inline void arch___clear_bit(unsigned long nr,
- volatile unsigned long *ptr)
+static __always_inline void
+arch___clear_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long *p = __bitops_word(nr, addr);
unsigned long mask = __bitops_mask(nr);
- *addr &= ~mask;
+ *p &= ~mask;
}
-static inline void arch___change_bit(unsigned long nr,
- volatile unsigned long *ptr)
+static __always_inline void
+arch___change_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long *p = __bitops_word(nr, addr);
unsigned long mask = __bitops_mask(nr);
- *addr ^= mask;
+ *p ^= mask;
}
-static inline bool arch___test_and_set_bit(unsigned long nr,
- volatile unsigned long *ptr)
+static __always_inline bool
+arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long *p = __bitops_word(nr, addr);
unsigned long mask = __bitops_mask(nr);
unsigned long old;
- old = *addr;
- *addr |= mask;
+ old = *p;
+ *p |= mask;
return old & mask;
}
-static inline bool arch___test_and_clear_bit(unsigned long nr,
- volatile unsigned long *ptr)
+static __always_inline bool
+arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long *p = __bitops_word(nr, addr);
unsigned long mask = __bitops_mask(nr);
unsigned long old;
- old = *addr;
- *addr &= ~mask;
+ old = *p;
+ *p &= ~mask;
return old & mask;
}
-static inline bool arch___test_and_change_bit(unsigned long nr,
- volatile unsigned long *ptr)
+static __always_inline bool
+arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long *p = __bitops_word(nr, addr);
unsigned long mask = __bitops_mask(nr);
unsigned long old;
- old = *addr;
- *addr ^= mask;
+ old = *p;
+ *p ^= mask;
return old & mask;
}
-static inline bool arch_test_bit(unsigned long nr,
- const volatile unsigned long *ptr)
-{
- const volatile unsigned long *addr = __bitops_word(nr, ptr);
- unsigned long mask = __bitops_mask(nr);
-
- return *addr & mask;
-}
+#define arch_test_bit generic_test_bit
+#define arch_test_bit_acquire generic_test_bit_acquire
static inline bool arch_test_and_set_bit_lock(unsigned long nr,
volatile unsigned long *ptr)
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index d4e90f2ba77e..bd1596810cc1 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -214,7 +214,6 @@ extern struct ccw_device *ccw_device_create_console(struct ccw_driver *);
extern void ccw_device_destroy_console(struct ccw_device *);
extern int ccw_device_enable_console(struct ccw_device *);
extern void ccw_device_wait_idle(struct ccw_device *);
-extern int ccw_device_force_console(struct ccw_device *);
extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size);
extern void ccw_device_dma_free(struct ccw_device *cdev,
diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h
index 14cfd48d598e..931204613753 100644
--- a/arch/s390/include/asm/cpufeature.h
+++ b/arch/s390/include/asm/cpufeature.h
@@ -2,28 +2,21 @@
/*
* Module interface for CPU features
*
- * Copyright IBM Corp. 2015
+ * Copyright IBM Corp. 2015, 2022
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
*/
#ifndef __ASM_S390_CPUFEATURE_H
#define __ASM_S390_CPUFEATURE_H
-#include <asm/elf.h>
+enum {
+ S390_CPU_FEATURE_MSA,
+ S390_CPU_FEATURE_VXRS,
+ S390_CPU_FEATURE_UV,
+ MAX_CPU_FEATURES
+};
-/* Hardware features on Linux on z Systems are indicated by facility bits that
- * are mapped to the so-called machine flags. Particular machine flags are
- * then used to define ELF hardware capabilities; most notably hardware flags
- * that are essential for user space / glibc.
- *
- * Restrict the set of exposed CPU features to ELF hardware capabilities for
- * now. Additional machine flags can be indicated by values larger than
- * MAX_ELF_HWCAP_FEATURES.
- */
-#define MAX_ELF_HWCAP_FEATURES (8 * sizeof(elf_hwcap))
-#define MAX_CPU_FEATURES MAX_ELF_HWCAP_FEATURES
-
-#define cpu_feature(feat) ilog2(HWCAP_ ## feat)
+#define cpu_feature(feature) (feature)
int cpu_have_feature(unsigned int nr);
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 267a8f88e143..adf7d8cdac7e 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -95,7 +95,8 @@ union ctlreg0 {
Interruption-Filtering Override */
unsigned long : 3;
unsigned long ccc : 1; /* Cryptography counter control */
- unsigned long : 18;
+ unsigned long pec : 1; /* PAI extension control */
+ unsigned long : 17;
unsigned long : 3;
unsigned long lap : 1; /* Low-address-protection control */
unsigned long : 4;
diff --git a/arch/s390/include/asm/dma.h b/arch/s390/include/asm/dma.h
index 6f26f35d4a71..dec1c4ce628c 100644
--- a/arch/s390/include/asm/dma.h
+++ b/arch/s390/include/asm/dma.h
@@ -11,10 +11,4 @@
*/
#define MAX_DMA_ADDRESS 0x80000000
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;
-#else
-#define isa_dma_bridge_buggy (0)
-#endif
-
#endif /* _ASM_S390_DMA_H */
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index e08c882dccaa..eaeaeb3ff0be 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -17,7 +17,8 @@
"3: jl 1b\n" \
" lhi %0,0\n" \
"4: sacf 768\n" \
- EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
+ EX_TABLE(0b,4b) EX_TABLE(1b,4b) \
+ EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
: "=d" (ret), "=&d" (oldval), "=&d" (newval), \
"=m" (*uaddr) \
: "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 40264f60b0da..5cc46e0dde62 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -147,5 +147,42 @@ int gmap_mprotect_notify(struct gmap *, unsigned long start,
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
unsigned long gaddr, unsigned long vmaddr);
int gmap_mark_unmergeable(void);
-void s390_reset_acc(struct mm_struct *mm);
+void s390_unlist_old_asce(struct gmap *gmap);
+int s390_replace_asce(struct gmap *gmap);
+void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
+int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, bool interruptible);
+
+/**
+ * s390_uv_destroy_range - Destroy a range of pages in the given mm.
+ * @mm: the mm on which to operate on
+ * @start: the start of the range
+ * @end: the end of the range
+ *
+ * This function will call cond_sched, so it should not generate stalls, but
+ * it will otherwise only return when it completed.
+ */
+static inline void s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end)
+{
+ (void)__s390_uv_destroy_range(mm, start, end, false);
+}
+
+/**
+ * s390_uv_destroy_range_interruptible - Destroy a range of pages in the
+ * given mm, but stop when a fatal signal is received.
+ * @mm: the mm on which to operate on
+ * @start: the start of the range
+ * @end: the end of the range
+ *
+ * This function will call cond_sched, so it should not generate stalls. If
+ * a fatal signal is received, it will return with -EINTR immediately,
+ * without finishing destroying the whole range. Upon successful
+ * completion, 0 is returned.
+ */
+static inline int s390_uv_destroy_range_interruptible(struct mm_struct *mm, unsigned long start,
+ unsigned long end)
+{
+ return __s390_uv_destroy_range(mm, start, end, true);
+}
#endif /* _ASM_S390_GMAP_H */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index f22beda9e6d5..ccdbccfde148 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -28,9 +28,11 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
static inline int prepare_hugepage_range(struct file *file,
unsigned long addr, unsigned long len)
{
- if (len & ~HPAGE_MASK)
+ struct hstate *h = hstate_file(file);
+
+ if (len & ~huge_page_mask(h))
return -EINVAL;
- if (addr & ~HPAGE_MASK)
+ if (addr & ~huge_page_mask(h))
return -EINVAL;
return 0;
}
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 916cfcb36d8a..895f774bbcc5 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -10,7 +10,6 @@
#include <linux/stringify.h>
#define JUMP_LABEL_NOP_SIZE 6
-#define JUMP_LABEL_NOP_OFFSET 2
#ifdef CONFIG_CC_IS_CLANG
#define JUMP_LABEL_STATIC_KEY_CONSTRAINT "i"
@@ -21,12 +20,12 @@
#endif
/*
- * We use a brcl 0,2 instruction for jump labels at compile time so it
+ * We use a brcl 0,<offset> instruction for jump labels so it
* can be easily distinguished from a hotpatch generated instruction.
*/
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("0: brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n"
+ asm_volatile_goto("0: brcl 0,%l[label]\n"
".pushsection __jump_table,\"aw\"\n"
".balign 8\n"
".long 0b-.,%l[label]-.\n"
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index 649ecdcc8734..1bd08eb56d5f 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -85,6 +85,17 @@ struct kimage_arch {
extern const struct kexec_file_ops s390_kexec_image_ops;
extern const struct kexec_file_ops s390_kexec_elf_ops;
+#ifdef CONFIG_CRASH_DUMP
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
+#define crash_free_reserved_phys_range crash_free_reserved_phys_range
+
+void arch_kexec_protect_crashkres(void);
+#define arch_kexec_protect_crashkres arch_kexec_protect_crashkres
+
+void arch_kexec_unprotect_crashkres(void);
+#define arch_kexec_unprotect_crashkres arch_kexec_unprotect_crashkres
+#endif
+
#ifdef CONFIG_KEXEC_FILE
struct purgatory_info;
int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
@@ -92,5 +103,8 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
const Elf_Shdr *relsec,
const Elf_Shdr *symtab);
#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image);
+#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
#endif
#endif /*_S390_KEXEC_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 766028d54a3e..b1e98a9ed152 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -19,6 +19,8 @@
#include <linux/kvm.h>
#include <linux/seqlock.h>
#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/mmu_notifier.h>
#include <asm/debug.h>
#include <asm/cpu.h>
#include <asm/fpu/api.h>
@@ -93,19 +95,30 @@ union ipte_control {
};
};
+union sca_utility {
+ __u16 val;
+ struct {
+ __u16 mtcr : 1;
+ __u16 reserved : 15;
+ };
+};
+
struct bsca_block {
union ipte_control ipte_control;
__u64 reserved[5];
__u64 mcn;
- __u64 reserved2;
+ union sca_utility utility;
+ __u8 reserved2[6];
struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
};
struct esca_block {
union ipte_control ipte_control;
- __u64 reserved1[7];
+ __u64 reserved1[6];
+ union sca_utility utility;
+ __u8 reserved2[6];
__u64 mcn[4];
- __u64 reserved2[20];
+ __u64 reserved3[20];
struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
};
@@ -249,12 +262,16 @@ struct kvm_s390_sie_block {
#define ECB_SPECI 0x08
#define ECB_SRSI 0x04
#define ECB_HOSTPROTINT 0x02
+#define ECB_PTF 0x01
__u8 ecb; /* 0x0061 */
#define ECB2_CMMA 0x80
#define ECB2_IEP 0x20
#define ECB2_PFMFI 0x08
#define ECB2_ESCA 0x04
+#define ECB2_ZPCI_LSI 0x02
__u8 ecb2; /* 0x0062 */
+#define ECB3_AISI 0x20
+#define ECB3_AISII 0x10
#define ECB3_DEA 0x08
#define ECB3_AES 0x04
#define ECB3_RI 0x01
@@ -759,6 +776,7 @@ struct kvm_vm_stat {
u64 inject_pfault_done;
u64 inject_service_signal;
u64 inject_virtio;
+ u64 aen_forward;
};
struct kvm_arch_memory_slot {
@@ -923,6 +941,8 @@ struct kvm_s390_pv {
u64 guest_len;
unsigned long stor_base;
void *stor_var;
+ bool dumping;
+ struct mmu_notifier mmu_notifier;
};
struct kvm_arch{
@@ -939,6 +959,7 @@ struct kvm_arch{
int use_cmma;
int use_pfmfi;
int use_skf;
+ int use_zpci_interp;
int user_cpu_state_ctrl;
int user_sigp;
int user_stsi;
@@ -962,6 +983,8 @@ struct kvm_arch{
DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
struct kvm_s390_gisa_interrupt gisa_int;
struct kvm_s390_pv pv;
+ struct list_head kzdev_list;
+ spinlock_t kzdev_list_lock;
};
#define KVM_HVA_ERR_BAD (-1UL)
@@ -1012,4 +1035,14 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+#define __KVM_HAVE_ARCH_VM_FREE
+void kvm_arch_free_vm(struct kvm *kvm);
+
+struct zpci_kvm_hook {
+ int (*kvm_register)(void *opaque, struct kvm *kvm);
+ void (*kvm_unregister)(void *opaque);
+};
+
+extern struct zpci_kvm_hook zpci_kvm_hook;
+
#endif
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 26fe5e535728..8aa1f6530a3e 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -203,7 +203,9 @@ struct lowcore {
__u8 pad_0x1400[0x1500-0x1400]; /* 0x1400 */
/* Cryptography-counter designation */
__u64 ccd; /* 0x1500 */
- __u8 pad_0x1508[0x1800-0x1508]; /* 0x1508 */
+ /* AI-extension counter designation */
+ __u64 aicd; /* 0x1508 */
+ __u8 pad_0x1510[0x1800-0x1510]; /* 0x1510 */
/* Transaction abort diagnostic block */
struct pgm_tdb pgm_tdb; /* 0x1800 */
diff --git a/arch/s390/include/asm/maccess.h b/arch/s390/include/asm/maccess.h
new file mode 100644
index 000000000000..c7fa838cf6b9
--- /dev/null
+++ b/arch/s390/include/asm/maccess.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_S390_MACCESS_H
+#define __ASM_S390_MACCESS_H
+
+#include <linux/types.h>
+
+struct iov_iter;
+
+extern unsigned long __memcpy_real_area;
+void memcpy_real_init(void);
+size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count);
+int memcpy_real(void *dest, unsigned long src, size_t count);
+#ifdef CONFIG_CRASH_DUMP
+int copy_oldmem_kernel(void *dst, unsigned long src, size_t count);
+#endif
+
+#endif /* __ASM_S390_MACCESS_H */
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 82aae78e1315..829d68e2c685 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -18,7 +18,7 @@ typedef struct {
unsigned long asce_limit;
unsigned long vdso_base;
/* The mmu context belongs to a secure guest. */
- atomic_t is_protected;
+ atomic_t protected_count;
/*
* The following bitfields need a down_write on the mm
* semaphore when they are written to. As they are only
@@ -42,18 +42,4 @@ typedef struct {
.context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
.context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list),
-static inline int tprot(unsigned long addr)
-{
- int rc = -EFAULT;
-
- asm volatile(
- " tprot 0(%1),0\n"
- "0: ipm %0\n"
- " srl %0,28\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "+d" (rc) : "a" (addr) : "cc");
- return rc;
-}
-
#endif
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index c7937f369e62..2a38af5a00c2 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -26,7 +26,7 @@ static inline int init_new_context(struct task_struct *tsk,
INIT_LIST_HEAD(&mm->context.gmap_list);
cpumask_clear(&mm->context.cpu_attach_mask);
atomic_set(&mm->context.flush_count, 0);
- atomic_set(&mm->context.is_protected, 0);
+ atomic_set(&mm->context.protected_count, 0);
mm->context.gmap_asce = 0;
mm->context.flush_mm = 0;
#ifdef CONFIG_PGSTE
diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
index d910d71b5bb5..7e9e99523e95 100644
--- a/arch/s390/include/asm/nospec-insn.h
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -2,8 +2,6 @@
#ifndef _ASM_S390_NOSPEC_ASM_H
#define _ASM_S390_NOSPEC_ASM_H
-#include <asm/alternative-asm.h>
-#include <asm/asm-offsets.h>
#include <asm/dwarf.h>
#ifdef __ASSEMBLY__
diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h
index 147a8d547ef9..0d1c74a7a650 100644
--- a/arch/s390/include/asm/os_info.h
+++ b/arch/s390/include/asm/os_info.h
@@ -8,6 +8,8 @@
#ifndef _ASM_S390_OS_INFO_H
#define _ASM_S390_OS_INFO_H
+#include <linux/uio.h>
+
#define OS_INFO_VERSION_MAJOR 1
#define OS_INFO_VERSION_MINOR 1
#define OS_INFO_MAGIC 0x4f53494e464f535aULL /* OSINFOSZ */
@@ -39,7 +41,6 @@ u32 os_info_csum(struct os_info *os_info);
#ifdef CONFIG_CRASH_DUMP
void *os_info_old_entry(int nr, unsigned long *size);
-int copy_oldmem_kernel(void *dst, unsigned long src, size_t count);
#else
static inline void *os_info_old_entry(int nr, unsigned long *size)
{
diff --git a/arch/s390/include/asm/pai.h b/arch/s390/include/asm/pai.h
index 5b7e33ac6f0b..1a8a6b15d121 100644
--- a/arch/s390/include/asm/pai.h
+++ b/arch/s390/include/asm/pai.h
@@ -17,7 +17,9 @@ struct qpaci_info_block {
struct {
u64 : 8;
u64 num_cc : 8; /* # of supported crypto counters */
- u64 : 48;
+ u64 : 9;
+ u64 num_nnpa : 7; /* # of supported NNPA counters */
+ u64 : 32;
};
};
@@ -42,6 +44,8 @@ static inline int qpaci(struct qpaci_info_block *info)
#define PAI_CRYPTO_BASE 0x1000 /* First event number */
#define PAI_CRYPTO_MAXCTR 256 /* Max # of event counters */
#define PAI_CRYPTO_KERNEL_OFFSET 2048
+#define PAI_NNPA_BASE 0x1800 /* First event number */
+#define PAI_NNPA_MAXCTR 128 /* Max # of event counters */
DECLARE_STATIC_KEY_FALSE(pai_key);
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index fdb9745ee998..108e732d7b14 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -6,9 +6,9 @@
#include <linux/mutex.h>
#include <linux/iommu.h>
#include <linux/pci_hotplug.h>
-#include <asm-generic/pci.h>
#include <asm/pci_clp.h>
#include <asm/pci_debug.h>
+#include <asm/pci_insn.h>
#include <asm/sclp.h>
#define PCIBIOS_MIN_IO 0x1000
@@ -97,6 +97,7 @@ struct zpci_bar_struct {
};
struct s390_domain;
+struct kvm_zdev;
#define ZPCI_FUNCTIONS_PER_BUS 256
struct zpci_bus {
@@ -116,18 +117,20 @@ struct zpci_bus {
struct zpci_dev {
struct zpci_bus *zbus;
struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */
- struct list_head bus_next;
struct kref kref;
struct hotplug_slot hotplug_slot;
enum zpci_state state;
u32 fid; /* function ID, used by sclp */
u32 fh; /* function handle, used by insn's */
+ u32 gisa; /* GISA designation for passthrough */
u16 vfn; /* virtual function number */
u16 pchid; /* physical channel ID */
+ u16 maxstbl; /* Maximum store block size */
u8 pfgid; /* function group ID */
u8 pft; /* pci function type */
u8 port;
+ u8 dtsm; /* Supported DT mask */
u8 rid_available : 1;
u8 has_hp_slot : 1;
u8 has_resources : 1;
@@ -186,7 +189,10 @@ struct zpci_dev {
struct dentry *debugfs_dev;
+ /* IOMMU and passthrough */
struct s390_domain *s390_domain; /* s390 IOMMU domain data */
+ struct kvm_zdev *kzdev;
+ struct mutex kzdev_lock;
};
static inline bool zdev_enabled(struct zpci_dev *zdev)
@@ -198,6 +204,9 @@ extern const struct attribute_group *zpci_attr_groups[];
extern unsigned int s390_pci_force_floating __initdata;
extern unsigned int s390_pci_no_rid;
+extern union zpci_sic_iib *zpci_aipb;
+extern struct airq_iv *zpci_aif_sbv;
+
/* -----------------------------------------------------------------------------
Prototypes
----------------------------------------------------------------------------- */
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index 1f4b666e85ee..d6189ed14f84 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -153,9 +153,11 @@ struct clp_rsp_query_pci_grp {
u8 : 6;
u8 frame : 1;
u8 refresh : 1; /* TLB refresh mode */
- u16 reserved2;
+ u16 : 3;
+ u16 maxstbl : 13; /* Maximum store block size */
u16 mui;
- u16 : 16;
+ u8 dtsm; /* Supported DT mask */
+ u8 reserved3;
u16 maxfaal;
u16 : 4;
u16 dnoi : 12;
@@ -173,7 +175,8 @@ struct clp_req_set_pci {
u16 reserved2;
u8 oc; /* operation controls */
u8 ndas; /* number of dma spaces */
- u64 reserved3;
+ u32 reserved3;
+ u32 gisa; /* GISA designation */
} __packed;
/* Set PCI function response */
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index 61cf9531f68f..e5f57cfe1d45 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -98,6 +98,15 @@ struct zpci_fib {
u32 gd;
} __packed __aligned(8);
+/* Set Interruption Controls Operation Controls */
+#define SIC_IRQ_MODE_ALL 0
+#define SIC_IRQ_MODE_SINGLE 1
+#define SIC_SET_AENI_CONTROLS 2
+#define SIC_IRQ_MODE_DIRECT 4
+#define SIC_IRQ_MODE_D_ALL 16
+#define SIC_IRQ_MODE_D_SINGLE 17
+#define SIC_IRQ_MODE_SET_CPU 18
+
/* directed interruption information block */
struct zpci_diib {
u32 : 1;
@@ -119,9 +128,20 @@ struct zpci_cdiib {
u64 : 64;
} __packed __aligned(8);
+/* adapter interruption parameters block */
+struct zpci_aipb {
+ u64 faisb;
+ u64 gait;
+ u16 : 13;
+ u16 afi : 3;
+ u32 : 32;
+ u16 faal;
+} __packed __aligned(8);
+
union zpci_sic_iib {
struct zpci_diib diib;
struct zpci_cdiib cdiib;
+ struct zpci_aipb aipb;
};
DECLARE_STATIC_KEY_FALSE(have_mio);
@@ -134,13 +154,6 @@ int __zpci_store(u64 data, u64 req, u64 offset);
int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len);
int __zpci_store_block(const u64 *data, u64 req, u64 offset);
void zpci_barrier(void);
-int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib);
-
-static inline int zpci_set_irq_ctrl(u16 ctl, u8 isc)
-{
- union zpci_sic_iib iib = {{0}};
-
- return __zpci_set_irq_ctrl(ctl, isc, &iib);
-}
+int zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib);
#endif
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index a397b072a580..f1cb9391190d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -424,23 +424,6 @@ static inline int is_module_addr(void *addr)
* implies read permission.
*/
/*xwr*/
-#define __P000 PAGE_NONE
-#define __P001 PAGE_RO
-#define __P010 PAGE_RO
-#define __P011 PAGE_RO
-#define __P100 PAGE_RX
-#define __P101 PAGE_RX
-#define __P110 PAGE_RX
-#define __P111 PAGE_RX
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_RO
-#define __S010 PAGE_RW
-#define __S011 PAGE_RW
-#define __S100 PAGE_RX
-#define __S101 PAGE_RX
-#define __S110 PAGE_RWX
-#define __S111 PAGE_RWX
/*
* Segment entry (large page) protection definitions.
@@ -525,7 +508,7 @@ static inline int mm_has_pgste(struct mm_struct *mm)
static inline int mm_is_protected(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
- if (unlikely(atomic_read(&mm->context.is_protected)))
+ if (unlikely(atomic_read(&mm->context.protected_count)))
return 1;
#endif
return 0;
@@ -1182,9 +1165,22 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
} else {
res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
}
- /* At this point the reference through the mapping is still present */
- if (mm_is_protected(mm) && pte_present(res))
- uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
+ /* Nothing to do */
+ if (!mm_is_protected(mm) || !pte_present(res))
+ return res;
+ /*
+ * At this point the reference through the mapping is still present.
+ * The notifier should have destroyed all protected vCPUs at this
+ * point, so the destroy should be successful.
+ */
+ if (full && !uv_destroy_owned_page(pte_val(res) & PAGE_MASK))
+ return res;
+ /*
+ * If something went wrong and the page could not be destroyed, or
+ * if this is not a mm teardown, the slower export is used as
+ * fallback instead.
+ */
+ uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
return res;
}
@@ -1781,6 +1777,10 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
extern int vmem_add_mapping(unsigned long start, unsigned long size);
extern void vmem_remove_mapping(unsigned long start, unsigned long size);
+extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc);
+extern int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot);
+extern void vmem_unmap_4k_page(unsigned long addr);
+extern pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc);
extern int s390_enable_sie(void);
extern int s390_enable_skey(void);
extern void s390_reset_cmma(struct mm_struct *mm);
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index bd66f8e34949..87be3e855bf7 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -186,9 +186,6 @@ struct pt_regs;
void show_registers(struct pt_regs *regs);
void show_cacheinfo(struct seq_file *m);
-/* Free all resources held by a thread. */
-static inline void release_thread(struct task_struct *tsk) { }
-
/* Free guarded storage control block */
void guarded_storage_release(struct task_struct *tsk);
void gs_load_bc_cb(struct pt_regs *regs);
@@ -306,23 +303,6 @@ static __always_inline void __noreturn disabled_wait(void)
#define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL
-extern int memcpy_real(void *, unsigned long, size_t);
-extern void memcpy_absolute(void *, void *, size_t);
-
-#define put_abs_lowcore(member, x) do { \
- unsigned long __abs_address = offsetof(struct lowcore, member); \
- __typeof__(((struct lowcore *)0)->member) __tmp = (x); \
- \
- memcpy_absolute(__va(__abs_address), &__tmp, sizeof(__tmp)); \
-} while (0)
-
-#define get_abs_lowcore(x, member) do { \
- unsigned long __abs_address = offsetof(struct lowcore, member); \
- __typeof__(((struct lowcore *)0)->member) *__ptr = &(x); \
- \
- memcpy_absolute(__ptr, __va(__abs_address), sizeof(*__ptr)); \
-} while (0)
-
extern int s390_isolate_bp(void);
extern int s390_isolate_bp_guest(void);
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 54ae2dc65e3b..2f983e0b95e0 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -133,9 +133,9 @@ struct slibe {
* @sb_count: number of storage blocks
* @sba: storage block element addresses
* @dcount: size of storage block elements
- * @user0: user defineable value
- * @res4: reserved paramater
- * @user1: user defineable value
+ * @user0: user definable value
+ * @res4: reserved parameter
+ * @user1: user definable value
*/
struct qaob {
u64 res0[6];
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 236b34b75ddb..9d4c7f71e070 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -17,6 +17,7 @@
#define EXT_SCCB_READ_CPU (3 * PAGE_SIZE)
#ifndef __ASSEMBLY__
+#include <linux/uio.h>
#include <asm/chpid.h>
#include <asm/cpu.h>
@@ -88,6 +89,10 @@ struct sclp_info {
unsigned char has_sipl : 1;
unsigned char has_dirq : 1;
unsigned char has_iplcc : 1;
+ unsigned char has_zpci_lsi : 1;
+ unsigned char has_aisii : 1;
+ unsigned char has_aeni : 1;
+ unsigned char has_aisi : 1;
unsigned int ibc;
unsigned int mtid;
unsigned int mtid_cp;
@@ -142,8 +147,7 @@ int sclp_pci_deconfigure(u32 fid);
int sclp_ap_configure(u32 apid);
int sclp_ap_deconfigure(u32 apid);
int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid);
-int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count);
-int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count);
+size_t memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count);
void sclp_ocf_cpc_name_copy(char *dst);
static inline int sclp_get_core_info(struct sclp_core_info *info, int early)
diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h
index 7ce584aff5bb..322bdcd4b616 100644
--- a/arch/s390/include/asm/scsw.h
+++ b/arch/s390/include/asm/scsw.h
@@ -215,6 +215,11 @@ union scsw {
#define SNS2_ENV_DATA_PRESENT 0x10
#define SNS2_INPRECISE_END 0x04
+/*
+ * architectured values for PPRC errors
+ */
+#define SNS7_INVALID_ON_SEC 0x0e
+
/**
* scsw_is_tm - check for transport mode scsw
* @scsw: pointer to scsw
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 7f5d4763357b..73ed2781073b 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -30,7 +30,8 @@ extern void smp_emergency_stop(void);
extern int smp_find_processor_id(u16 address);
extern int smp_store_status(int cpu);
-extern void smp_save_dump_cpus(void);
+extern void smp_save_dump_ipl_cpu(void);
+extern void smp_save_dump_secondary_cpus(void);
extern void smp_yield_cpu(int cpu);
extern void smp_cpu_set_polarization(int cpu, int val);
extern int smp_cpu_get_polarization(int cpu);
@@ -58,6 +59,7 @@ static inline void smp_cpus_done(unsigned int max_cpus)
{
}
+extern int smp_reinit_ipl_cpu(void);
extern int smp_rescan_cpus(void);
extern void __noreturn cpu_die(void);
extern void __cpu_die(unsigned int cpu);
diff --git a/arch/s390/include/asm/softirq_stack.h b/arch/s390/include/asm/softirq_stack.h
index fd17f25704bd..1ac5115d3115 100644
--- a/arch/s390/include/asm/softirq_stack.h
+++ b/arch/s390/include/asm/softirq_stack.h
@@ -5,9 +5,10 @@
#include <asm/lowcore.h>
#include <asm/stacktrace.h>
+#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
static inline void do_softirq_own_stack(void)
{
call_on_stack(0, S390_lowcore.async_stack, void, __do_softirq);
}
-
+#endif
#endif /* __ASM_S390_SOFTIRQ_STACK_H */
diff --git a/arch/s390/include/asm/termios.h b/arch/s390/include/asm/termios.h
deleted file mode 100644
index 46fa3020b41e..000000000000
--- a/arch/s390/include/asm/termios.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * S390 version
- *
- * Derived from "include/asm-i386/termios.h"
- */
-#ifndef _S390_TERMIOS_H
-#define _S390_TERMIOS_H
-
-#include <uapi/asm/termios.h>
-
-
-/* intr=^C quit=^\ erase=del kill=^U
- eof=^D vtime=\0 vmin=\1 sxtc=\0
- start=^Q stop=^S susp=^Z eol=\0
- reprint=^R discard=^U werase=^W lnext=^V
- eol2=\0
-*/
-#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
-
-#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2))
-#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2))
-
-#include <asm-generic/termios-base.h>
-
-#endif /* _S390_TERMIOS_H */
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index fe6407f0eb1b..3a5c8fb590e5 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -27,9 +27,6 @@ static inline void tlb_flush(struct mmu_gather *tlb);
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
struct page *page, int page_size);
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-
#define tlb_flush tlb_flush
#define pte_free_tlb pte_free_tlb
#define pmd_free_tlb pmd_free_tlb
diff --git a/arch/s390/include/asm/tpi.h b/arch/s390/include/asm/tpi.h
index 1ac538b8cbf5..f76e5fdff23a 100644
--- a/arch/s390/include/asm/tpi.h
+++ b/arch/s390/include/asm/tpi.h
@@ -19,6 +19,19 @@ struct tpi_info {
u32 :12;
} __packed __aligned(4);
+/* I/O-Interruption Code as stored by TPI for an Adapter I/O */
+struct tpi_adapter_info {
+ u32 aism:8;
+ u32 :22;
+ u32 error:1;
+ u32 forward:1;
+ u32 reserved;
+ u32 adapter_IO:1;
+ u32 directed_irq:1;
+ u32 isc:3;
+ u32 :27;
+} __packed __aligned(4);
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_TPI_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index f4511e21d646..f7038b800cc3 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -39,7 +39,7 @@ _copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned
static __always_inline unsigned long __must_check
copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key)
{
- if (likely(check_copy_size(to, n, false)))
+ if (check_copy_size(to, n, false))
n = _copy_from_user_key(to, from, n, key);
return n;
}
@@ -50,7 +50,7 @@ _copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned l
static __always_inline unsigned long __must_check
copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key)
{
- if (likely(check_copy_size(from, n, true)))
+ if (check_copy_size(from, n, true))
n = _copy_to_user_key(to, from, n, key);
return n;
}
@@ -285,7 +285,6 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo
return __clear_user(to, n);
}
-int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count);
void *s390_kernel_write(void *dst, const void *src, size_t size);
int __noreturn __put_kernel_bad(void);
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
index 0bf06f1682d8..02462e7100c1 100644
--- a/arch/s390/include/asm/unwind.h
+++ b/arch/s390/include/asm/unwind.h
@@ -47,7 +47,7 @@ struct unwind_state {
static inline unsigned long unwind_recover_ret_addr(struct unwind_state *state,
unsigned long ip)
{
- ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL);
+ ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *)state->sp);
if (is_kretprobe_trampoline(ip))
ip = kretprobe_find_ret_addr(state->task, (void *)state->sp, &state->kr_cur);
return ip;
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index cfea7b77a5b8..be3ef9dd6972 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -50,6 +50,10 @@
#define UVC_CMD_SET_UNSHARE_ALL 0x0340
#define UVC_CMD_PIN_PAGE_SHARED 0x0341
#define UVC_CMD_UNPIN_PAGE_SHARED 0x0342
+#define UVC_CMD_DUMP_INIT 0x0400
+#define UVC_CMD_DUMP_CONF_STOR_STATE 0x0401
+#define UVC_CMD_DUMP_CPU 0x0402
+#define UVC_CMD_DUMP_COMPLETE 0x0403
#define UVC_CMD_SET_SHARED_ACCESS 0x1000
#define UVC_CMD_REMOVE_SHARED_ACCESS 0x1001
#define UVC_CMD_RETR_ATTEST 0x1020
@@ -77,6 +81,10 @@ enum uv_cmds_inst {
BIT_UVC_CMD_UNSHARE_ALL = 20,
BIT_UVC_CMD_PIN_PAGE_SHARED = 21,
BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22,
+ BIT_UVC_CMD_DUMP_INIT = 24,
+ BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE = 25,
+ BIT_UVC_CMD_DUMP_CPU = 26,
+ BIT_UVC_CMD_DUMP_COMPLETE = 27,
BIT_UVC_CMD_RETR_ATTEST = 28,
};
@@ -110,7 +118,16 @@ struct uv_cb_qui {
u8 reserved88[158 - 136]; /* 0x0088 */
u16 max_guest_cpu_id; /* 0x009e */
u64 uv_feature_indications; /* 0x00a0 */
- u8 reserveda8[200 - 168]; /* 0x00a8 */
+ u64 reserveda8; /* 0x00a8 */
+ u64 supp_se_hdr_versions; /* 0x00b0 */
+ u64 supp_se_hdr_pcf; /* 0x00b8 */
+ u64 reservedc0; /* 0x00c0 */
+ u64 conf_dump_storage_state_len; /* 0x00c8 */
+ u64 conf_dump_finalize_len; /* 0x00d0 */
+ u64 reservedd8; /* 0x00d8 */
+ u64 supp_att_req_hdr_ver; /* 0x00e0 */
+ u64 supp_att_pflags; /* 0x00e8 */
+ u8 reservedf0[256 - 240]; /* 0x00f0 */
} __packed __aligned(8);
/* Initialize Ultravisor */
@@ -240,6 +257,31 @@ struct uv_cb_attest {
u64 reserved168[4]; /* 0x0168 */
} __packed __aligned(8);
+struct uv_cb_dump_cpu {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 cpu_handle;
+ u64 dump_area_origin;
+ u64 reserved28[5];
+} __packed __aligned(8);
+
+struct uv_cb_dump_stor_state {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 config_handle;
+ u64 dump_area_origin;
+ u64 gaddr;
+ u64 reserved28[4];
+} __packed __aligned(8);
+
+struct uv_cb_dump_complete {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 config_handle;
+ u64 dump_area_origin;
+ u64 reserved30[5];
+} __packed __aligned(8);
+
static inline int __uv_call(unsigned long r1, unsigned long r2)
{
int cc;
@@ -307,6 +349,12 @@ struct uv_info {
unsigned int max_num_sec_conf;
unsigned short max_guest_cpu_id;
unsigned long uv_feature_indications;
+ unsigned long supp_se_hdr_ver;
+ unsigned long supp_se_hdr_pcf;
+ unsigned long conf_dump_storage_state_len;
+ unsigned long conf_dump_finalize_len;
+ unsigned long supp_att_req_hdr_ver;
+ unsigned long supp_att_pflags;
};
extern struct uv_info uv_info;
@@ -378,6 +426,7 @@ static inline int is_prot_virt_host(void)
}
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
+int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
int uv_destroy_owned_page(unsigned long paddr);
int uv_convert_from_secure(unsigned long paddr);
int uv_convert_owned_from_secure(unsigned long paddr);
diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h
index 9ec86fae9980..93d1ccd3304c 100644
--- a/arch/s390/include/uapi/asm/dasd.h
+++ b/arch/s390/include/uapi/asm/dasd.h
@@ -183,6 +183,18 @@ typedef struct format_data_t {
} format_data_t;
/*
+ * struct dasd_copypair_swap_data_t
+ * represents all data necessary to issue a swap of the copy pair relation
+ */
+struct dasd_copypair_swap_data_t {
+ char primary[20]; /* BUSID of primary */
+ char secondary[20]; /* BUSID of secondary */
+
+ /* Reserved for future updates. */
+ __u8 reserved[64];
+};
+
+/*
* values to be used for format_data_t.intensity
* 0/8: normal format
* 1/9: also write record zero
@@ -326,6 +338,8 @@ struct dasd_snid_ioctl_data {
#define BIODASDSATTR _IOW(DASD_IOCTL_LETTER,2,attrib_data_t)
/* Release Allocated Space */
#define BIODASDRAS _IOW(DASD_IOCTL_LETTER, 3, format_data_t)
+/* Swap copy pair relation */
+#define BIODASDCOPYPAIRSWAP _IOW(DASD_IOCTL_LETTER, 4, struct dasd_copypair_swap_data_t)
/* Get Sense Path Group ID (SNID) data */
#define BIODASDSNID _IOWR(DASD_IOCTL_LETTER, 1, struct dasd_snid_ioctl_data)
diff --git a/arch/s390/include/uapi/asm/hwctrset.h b/arch/s390/include/uapi/asm/hwctrset.h
index 3d8284b95f87..e56b9dd23a4b 100644
--- a/arch/s390/include/uapi/asm/hwctrset.h
+++ b/arch/s390/include/uapi/asm/hwctrset.h
@@ -30,18 +30,18 @@ struct s390_ctrset_start { /* Set CPUs to operate on */
struct s390_ctrset_setdata { /* Counter set data */
__u32 set; /* Counter set number */
__u32 no_cnts; /* # of counters stored in cv[] */
- __u64 cv[0]; /* Counter values (variable length) */
+ __u64 cv[]; /* Counter values (variable length) */
};
struct s390_ctrset_cpudata { /* Counter set data per CPU */
__u32 cpu_nr; /* CPU number */
__u32 no_sets; /* # of counters sets in data[] */
- struct s390_ctrset_setdata data[0];
+ struct s390_ctrset_setdata data[];
};
struct s390_ctrset_read { /* Structure to get all ctr sets */
__u64 no_cpus; /* Total # of CPUs data taken from */
- struct s390_ctrset_cpudata data[0];
+ struct s390_ctrset_cpudata data[];
};
#define S390_HWCTR_MAGIC 'C' /* Random magic # for ioctls */
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 7a6b14874d65..a73cf01a1606 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -74,6 +74,7 @@ struct kvm_s390_io_adapter_req {
#define KVM_S390_VM_CRYPTO 2
#define KVM_S390_VM_CPU_MODEL 3
#define KVM_S390_VM_MIGRATION 4
+#define KVM_S390_VM_CPU_TOPOLOGY 5
/* kvm attributes for mem_ctrl */
#define KVM_S390_VM_MEM_ENABLE_CMMA 0
diff --git a/arch/s390/include/uapi/asm/termios.h b/arch/s390/include/uapi/asm/termios.h
deleted file mode 100644
index 54223169c806..000000000000
--- a/arch/s390/include/uapi/asm/termios.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * S390 version
- *
- * Derived from "include/asm-i386/termios.h"
- */
-
-#ifndef _UAPI_S390_TERMIOS_H
-#define _UAPI_S390_TERMIOS_H
-
-#include <asm/termbits.h>
-#include <asm/ioctls.h>
-
-struct winsize {
- unsigned short ws_row;
- unsigned short ws_col;
- unsigned short ws_xpixel;
- unsigned short ws_ypixel;
-};
-
-#define NCC 8
-struct termio {
- unsigned short c_iflag; /* input mode flags */
- unsigned short c_oflag; /* output mode flags */
- unsigned short c_cflag; /* control mode flags */
- unsigned short c_lflag; /* local mode flags */
- unsigned char c_line; /* line discipline */
- unsigned char c_cc[NCC]; /* control characters */
-};
-
-/* modem lines */
-#define TIOCM_LE 0x001
-#define TIOCM_DTR 0x002
-#define TIOCM_RTS 0x004
-#define TIOCM_ST 0x008
-#define TIOCM_SR 0x010
-#define TIOCM_CTS 0x020
-#define TIOCM_CAR 0x040
-#define TIOCM_RNG 0x080
-#define TIOCM_DSR 0x100
-#define TIOCM_CD TIOCM_CAR
-#define TIOCM_RI TIOCM_RNG
-#define TIOCM_OUT1 0x2000
-#define TIOCM_OUT2 0x4000
-#define TIOCM_LOOP 0x8000
-
-/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
-
-
-#endif /* _UAPI_S390_TERMIOS_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 27d6b3c7aa06..5e6a23299790 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -33,16 +33,16 @@ CFLAGS_stacktrace.o += -fno-optimize-sibling-calls
CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls
-obj-y := traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
+obj-y := head64.o traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
-obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o
+obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o
obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
-obj-y += smp.o text_amode31.o stacktrace.o
+obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o
-extra-y += head64.o vmlinux.lds
+extra-y += vmlinux.lds
obj-$(CONFIG_SYSFS) += nospec-sysfs.o
CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
@@ -72,7 +72,7 @@ obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf_common.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
-obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o
+obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o
obj-$(CONFIG_TRACEPOINTS) += trace.o
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c
new file mode 100644
index 000000000000..fb92e8ed0525
--- /dev/null
+++ b/arch/s390/kernel/abs_lowcore.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/pgtable.h>
+#include <asm/abs_lowcore.h>
+
+#define ABS_LOWCORE_UNMAPPED 1
+#define ABS_LOWCORE_LAP_ON 2
+#define ABS_LOWCORE_IRQS_ON 4
+
+unsigned long __bootdata_preserved(__abs_lowcore);
+bool __ro_after_init abs_lowcore_mapped;
+
+int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc)
+{
+ unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
+ unsigned long phys = __pa(lc);
+ int rc, i;
+
+ for (i = 0; i < LC_PAGES; i++) {
+ rc = __vmem_map_4k_page(addr, phys, PAGE_KERNEL, alloc);
+ if (rc) {
+ /*
+ * Do not unmap allocated page tables in case the
+ * allocation was not requested. In such a case the
+ * request is expected coming from an atomic context,
+ * while the unmap attempt might sleep.
+ */
+ if (alloc) {
+ for (--i; i >= 0; i--) {
+ addr -= PAGE_SIZE;
+ vmem_unmap_4k_page(addr);
+ }
+ }
+ return rc;
+ }
+ addr += PAGE_SIZE;
+ phys += PAGE_SIZE;
+ }
+ return 0;
+}
+
+void abs_lowcore_unmap(int cpu)
+{
+ unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
+ int i;
+
+ for (i = 0; i < LC_PAGES; i++) {
+ vmem_unmap_4k_page(addr);
+ addr += PAGE_SIZE;
+ }
+}
+
+struct lowcore *get_abs_lowcore(unsigned long *flags)
+{
+ unsigned long irq_flags;
+ union ctlreg0 cr0;
+ int cpu;
+
+ *flags = 0;
+ cpu = get_cpu();
+ if (abs_lowcore_mapped) {
+ return ((struct lowcore *)__abs_lowcore) + cpu;
+ } else {
+ if (cpu != 0)
+ panic("Invalid unmapped absolute lowcore access\n");
+ local_irq_save(irq_flags);
+ if (!irqs_disabled_flags(irq_flags))
+ *flags |= ABS_LOWCORE_IRQS_ON;
+ __ctl_store(cr0.val, 0, 0);
+ if (cr0.lap) {
+ *flags |= ABS_LOWCORE_LAP_ON;
+ __ctl_clear_bit(0, 28);
+ }
+ *flags |= ABS_LOWCORE_UNMAPPED;
+ return lowcore_ptr[0];
+ }
+}
+
+void put_abs_lowcore(struct lowcore *lc, unsigned long flags)
+{
+ if (abs_lowcore_mapped) {
+ if (flags)
+ panic("Invalid mapped absolute lowcore release\n");
+ } else {
+ if (smp_processor_id() != 0)
+ panic("Invalid mapped absolute lowcore access\n");
+ if (!(flags & ABS_LOWCORE_UNMAPPED))
+ panic("Invalid unmapped absolute lowcore release\n");
+ if (flags & ABS_LOWCORE_LAP_ON)
+ __ctl_set_bit(0, 28);
+ if (flags & ABS_LOWCORE_IRQS_ON)
+ local_irq_enable();
+ }
+ put_cpu();
+}
diff --git a/arch/s390/kernel/cpufeature.c b/arch/s390/kernel/cpufeature.c
new file mode 100644
index 000000000000..1b2ae42a0c15
--- /dev/null
+++ b/arch/s390/kernel/cpufeature.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2022
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/bug.h>
+#include <asm/elf.h>
+
+enum {
+ TYPE_HWCAP,
+ TYPE_FACILITY,
+};
+
+struct s390_cpu_feature {
+ unsigned int type : 4;
+ unsigned int num : 28;
+};
+
+static struct s390_cpu_feature s390_cpu_features[MAX_CPU_FEATURES] = {
+ [S390_CPU_FEATURE_MSA] = {.type = TYPE_HWCAP, .num = HWCAP_NR_MSA},
+ [S390_CPU_FEATURE_VXRS] = {.type = TYPE_HWCAP, .num = HWCAP_NR_VXRS},
+ [S390_CPU_FEATURE_UV] = {.type = TYPE_FACILITY, .num = 158},
+};
+
+/*
+ * cpu_have_feature - Test CPU features on module initialization
+ */
+int cpu_have_feature(unsigned int num)
+{
+ struct s390_cpu_feature *feature;
+
+ if (WARN_ON_ONCE(num >= MAX_CPU_FEATURES))
+ return 0;
+ feature = &s390_cpu_features[num];
+ switch (feature->type) {
+ case TYPE_HWCAP:
+ return !!(elf_hwcap & BIT(feature->num));
+ case TYPE_FACILITY:
+ return test_facility(feature->num);
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(cpu_have_feature);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index a2c1c55daec0..dd74fe664ed1 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -21,6 +21,7 @@
#include <asm/elf.h>
#include <asm/ipl.h>
#include <asm/sclp.h>
+#include <asm/maccess.h>
#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
@@ -63,7 +64,7 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
sa = memblock_alloc(sizeof(*sa), 8);
if (!sa)
- panic("Failed to allocate save area\n");
+ return NULL;
if (is_boot_cpu)
list_add(&sa->list, &dump_save_areas);
@@ -114,38 +115,15 @@ void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs)
memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128));
}
-/*
- * Return physical address for virtual address
- */
-static inline void *load_real_addr(void *addr)
-{
- unsigned long real_addr;
-
- asm volatile(
- " lra %0,0(%1)\n"
- " jz 0f\n"
- " la %0,0\n"
- "0:"
- : "=a" (real_addr) : "a" (addr) : "cc");
- return (void *)real_addr;
-}
-
-/*
- * Copy memory of the old, dumped system to a kernel space virtual address
- */
-int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
+static size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count)
{
- unsigned long len;
- void *ra;
- int rc;
+ size_t len, copied, res = 0;
while (count) {
if (!oldmem_data.start && src < sclp.hsa_size) {
/* Copy from zfcp/nvme dump HSA area */
len = min(count, sclp.hsa_size - src);
- rc = memcpy_hsa_kernel(dst, src, len);
- if (rc)
- return rc;
+ copied = memcpy_hsa_iter(iter, src, len);
} else {
/* Check for swapped kdump oldmem areas */
if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
@@ -157,56 +135,27 @@ int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
} else {
len = count;
}
- if (is_vmalloc_or_module_addr(dst)) {
- ra = load_real_addr(dst);
- len = min(PAGE_SIZE - offset_in_page(ra), len);
- } else {
- ra = dst;
- }
- if (memcpy_real(ra, src, len))
- return -EFAULT;
+ copied = memcpy_real_iter(iter, src, len);
}
- dst += len;
- src += len;
- count -= len;
+ count -= copied;
+ src += copied;
+ res += copied;
+ if (copied < len)
+ break;
}
- return 0;
+ return res;
}
-/*
- * Copy memory of the old, dumped system to a user space virtual address
- */
-static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
+int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
{
- unsigned long len;
- int rc;
+ struct iov_iter iter;
+ struct kvec kvec;
- while (count) {
- if (!oldmem_data.start && src < sclp.hsa_size) {
- /* Copy from zfcp/nvme dump HSA area */
- len = min(count, sclp.hsa_size - src);
- rc = memcpy_hsa_user(dst, src, len);
- if (rc)
- return rc;
- } else {
- /* Check for swapped kdump oldmem areas */
- if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
- src -= oldmem_data.start;
- len = min(count, oldmem_data.size - src);
- } else if (oldmem_data.start && src < oldmem_data.size) {
- len = min(count, oldmem_data.size - src);
- src += oldmem_data.start;
- } else {
- len = count;
- }
- rc = copy_to_user_real(dst, src, count);
- if (rc)
- return rc;
- }
- dst += len;
- src += len;
- count -= len;
- }
+ kvec.iov_base = dst;
+ kvec.iov_len = count;
+ iov_iter_kvec(&iter, WRITE, &kvec, 1, count);
+ if (copy_oldmem_iter(&iter, src, count) < count)
+ return -EFAULT;
return 0;
}
@@ -217,18 +166,9 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize,
unsigned long offset)
{
unsigned long src;
- int rc;
- if (!csize)
- return 0;
src = pfn_to_phys(pfn) + offset;
-
- /* XXX: pass the iov_iter down to a common function */
- if (iter_is_iovec(iter))
- rc = copy_oldmem_user(iter->iov->iov_base, src, csize);
- else
- rc = copy_oldmem_kernel(iter->kvec->iov_base, src, csize);
- return rc;
+ return copy_oldmem_iter(iter, src, csize);
}
/*
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 4331c7e6e1c0..d7a82066a638 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -250,7 +250,7 @@ static debug_info_t *debug_info_alloc(const char *name, int pages_per_area,
rc->level = level;
rc->buf_size = buf_size;
rc->entry_size = sizeof(debug_entry_t) + buf_size;
- strlcpy(rc->name, name, sizeof(rc->name));
+ strscpy(rc->name, name, sizeof(rc->name));
memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *));
memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *));
refcount_set(&(rc->ref_count), 0);
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 432c8c987256..6030fdd6997b 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -267,7 +267,7 @@ char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
static void __init setup_boot_command_line(void)
{
/* copy arch command line */
- strlcpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE);
+ strscpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE);
}
static void __init check_image_bootable(void)
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 1cc85b8ff42e..325cbf69ebbd 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -29,6 +29,7 @@
#include <asm/sclp.h>
#include <asm/checksum.h>
#include <asm/debug.h>
+#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
#include <asm/sections.h>
#include <asm/boot_data.h>
@@ -1642,12 +1643,16 @@ static struct shutdown_action __refdata dump_action = {
static void dump_reipl_run(struct shutdown_trigger *trigger)
{
unsigned long ipib = (unsigned long) reipl_block_actual;
+ struct lowcore *abs_lc;
+ unsigned long flags;
unsigned int csum;
csum = (__force unsigned int)
csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
- put_abs_lowcore(ipib, ipib);
- put_abs_lowcore(ipib_checksum, csum);
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->ipib = ipib;
+ abs_lc->ipib_checksum = csum;
+ put_abs_lowcore(abs_lc, flags);
dump_run(trigger);
}
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
index 6bec000c6c1c..e808bb8bc0da 100644
--- a/arch/s390/kernel/jump_label.c
+++ b/arch/s390/kernel/jump_label.c
@@ -44,14 +44,8 @@ static void jump_label_bug(struct jump_entry *entry, struct insn *expected,
panic("Corrupted kernel text");
}
-static struct insn orignop = {
- .opcode = 0xc004,
- .offset = JUMP_LABEL_NOP_OFFSET >> 1,
-};
-
static void jump_label_transform(struct jump_entry *entry,
- enum jump_label_type type,
- int init)
+ enum jump_label_type type)
{
void *code = (void *)jump_entry_code(entry);
struct insn old, new;
@@ -63,27 +57,22 @@ static void jump_label_transform(struct jump_entry *entry,
jump_label_make_branch(entry, &old);
jump_label_make_nop(entry, &new);
}
- if (init) {
- if (memcmp(code, &orignop, sizeof(orignop)))
- jump_label_bug(entry, &orignop, &new);
- } else {
- if (memcmp(code, &old, sizeof(old)))
- jump_label_bug(entry, &old, &new);
- }
+ if (memcmp(code, &old, sizeof(old)))
+ jump_label_bug(entry, &old, &new);
s390_kernel_write(code, &new, sizeof(new));
}
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
- jump_label_transform(entry, type, 0);
+ jump_label_transform(entry, type);
text_poke_sync();
}
bool arch_jump_label_transform_queue(struct jump_entry *entry,
enum jump_label_type type)
{
- jump_label_transform(entry, type, 0);
+ jump_label_transform(entry, type);
return true;
}
@@ -91,10 +80,3 @@ void arch_jump_label_transform_apply(void)
{
text_poke_sync();
}
-
-void __init_or_module arch_jump_label_transform_static(struct jump_entry *entry,
- enum jump_label_type type)
-{
- jump_label_transform(entry, type, 1);
- text_poke_sync();
-}
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index ab761c008f98..4579b42286d5 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -21,6 +21,7 @@
#include <asm/elf.h>
#include <asm/asm-offsets.h>
#include <asm/cacheflush.h>
+#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
#include <asm/set_memory.h>
#include <asm/stacktrace.h>
@@ -222,13 +223,18 @@ void machine_kexec_cleanup(struct kimage *image)
void arch_crash_save_vmcoreinfo(void)
{
+ struct lowcore *abs_lc;
+ unsigned long flags;
+
VMCOREINFO_SYMBOL(lowcore_ptr);
VMCOREINFO_SYMBOL(high_memory);
VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31);
vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31);
vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
- put_abs_lowcore(vmcore_info, paddr_vmcoreinfo_note());
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->vmcore_info = paddr_vmcoreinfo_note();
+ put_abs_lowcore(abs_lc, flags);
}
void machine_shutdown(void)
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index 8f43575a4dd3..fc6d5f58debe 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -31,6 +31,7 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len)
const unsigned long marker_len = sizeof(MODULE_SIG_STRING) - 1;
struct module_signature *ms;
unsigned long sig_len;
+ int ret;
/* Skip signature verification when not secure IPLed. */
if (!ipl_secure_flag)
@@ -65,11 +66,18 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len)
return -EBADMSG;
}
- return verify_pkcs7_signature(kernel, kernel_len,
- kernel + kernel_len, sig_len,
- VERIFY_USE_PLATFORM_KEYRING,
- VERIFYING_MODULE_SIGNATURE,
- NULL, NULL);
+ ret = verify_pkcs7_signature(kernel, kernel_len,
+ kernel + kernel_len, sig_len,
+ VERIFY_USE_SECONDARY_KEYRING,
+ VERIFYING_MODULE_SIGNATURE,
+ NULL, NULL);
+ if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING))
+ ret = verify_pkcs7_signature(kernel, kernel_len,
+ kernel + kernel_len, sig_len,
+ VERIFY_USE_PLATFORM_KEYRING,
+ VERIFYING_MODULE_SIGNATURE,
+ NULL, NULL);
+ return ret;
}
#endif /* CONFIG_KEXEC_SIG */
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 26125a9c436d..2d159b32885b 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -548,6 +548,5 @@ int module_finalize(const Elf_Ehdr *hdr,
#endif /* CONFIG_FUNCTION_TRACER */
}
- jump_label_apply_nops(me);
return 0;
}
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 53ed3884fe64..31cb9b00a36b 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -11,6 +11,7 @@
#include <linux/kernel_stat.h>
#include <linux/init.h>
#include <linux/errno.h>
+#include <linux/entry-common.h>
#include <linux/hardirq.h>
#include <linux/log2.h>
#include <linux/kprobes.h>
@@ -63,7 +64,7 @@ static inline unsigned long nmi_get_mcesa_size(void)
* structure. The structure is required for machine check happening
* early in the boot process.
*/
-static struct mcesa boot_mcesa __initdata __aligned(MCESA_MAX_SIZE);
+static struct mcesa boot_mcesa __aligned(MCESA_MAX_SIZE);
void __init nmi_alloc_mcesa_early(u64 *mcesad)
{
@@ -397,11 +398,12 @@ int notrace s390_do_machine_check(struct pt_regs *regs)
static unsigned long long last_ipd;
struct mcck_struct *mcck;
unsigned long long tmp;
+ irqentry_state_t irq_state;
union mci mci;
unsigned long mcck_dam_code;
int mcck_pending = 0;
- nmi_enter();
+ irq_state = irqentry_nmi_enter(regs);
if (user_mode(regs))
update_timer_mcck();
@@ -504,14 +506,14 @@ int notrace s390_do_machine_check(struct pt_regs *regs)
clear_cpu_flag(CIF_MCCK_GUEST);
if (user_mode(regs) && mcck_pending) {
- nmi_exit();
+ irqentry_nmi_exit(regs, irq_state);
return 1;
}
if (mcck_pending)
schedule_mcck_handler();
- nmi_exit();
+ irqentry_nmi_exit(regs, irq_state);
return 0;
}
NOKPROBE_SYMBOL(s390_do_machine_check);
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
index 1acc2e05d70f..ec0bd9457e90 100644
--- a/arch/s390/kernel/os_info.c
+++ b/arch/s390/kernel/os_info.c
@@ -13,8 +13,9 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <asm/checksum.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
+#include <asm/maccess.h>
#include <asm/asm-offsets.h>
/*
@@ -57,13 +58,16 @@ void os_info_entry_add(int nr, void *ptr, u64 size)
*/
void __init os_info_init(void)
{
- void *ptr = &os_info;
+ struct lowcore *abs_lc;
+ unsigned long flags;
os_info.version_major = OS_INFO_VERSION_MAJOR;
os_info.version_minor = OS_INFO_VERSION_MINOR;
os_info.magic = OS_INFO_MAGIC;
os_info.csum = os_info_csum(&os_info);
- put_abs_lowcore(os_info, __pa(ptr));
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->os_info = __pa(&os_info);
+ put_abs_lowcore(abs_lc, flags);
}
#ifdef CONFIG_CRASH_DUMP
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 483ab5e10164..f043a7ff220b 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -516,6 +516,26 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
return err;
}
+/* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different
+ * attribute::type values:
+ * - PERF_TYPE_HARDWARE:
+ * - pmu->type:
+ * Handle both type of invocations identical. They address the same hardware.
+ * The result is different when event modifiers exclude_kernel and/or
+ * exclude_user are also set.
+ */
+static int cpumf_pmu_event_type(struct perf_event *event)
+{
+ u64 ev = event->attr.config;
+
+ if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev ||
+ cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev ||
+ cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
+ cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev)
+ return PERF_TYPE_HARDWARE;
+ return PERF_TYPE_RAW;
+}
+
static int cpumf_pmu_event_init(struct perf_event *event)
{
unsigned int type = event->attr.type;
@@ -525,7 +545,7 @@ static int cpumf_pmu_event_init(struct perf_event *event)
err = __hw_perf_event_init(event, type);
else if (event->pmu->type == type)
/* Registered as unknown PMU */
- err = __hw_perf_event_init(event, PERF_TYPE_RAW);
+ err = __hw_perf_event_init(event, cpumf_pmu_event_type(event));
else
return -ENOENT;
@@ -644,6 +664,7 @@ static int cfdiag_push_sample(struct perf_event *event,
raw.frag.data = cpuhw->stop;
raw.size = raw.frag.size;
data.raw = &raw;
+ data.sample_flags |= PERF_SAMPLE_RAW;
}
overflow = perf_event_overflow(event, &data, &regs);
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index 8c1545946d85..6826e2a69a21 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -193,8 +193,9 @@ static int paicrypt_event_init(struct perf_event *event)
/* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */
if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
return -ENOENT;
- /* PAI crypto event must be valid */
- if (a->config > PAI_CRYPTO_BASE + paicrypt_cnt)
+ /* PAI crypto event must be in valid range */
+ if (a->config < PAI_CRYPTO_BASE ||
+ a->config > PAI_CRYPTO_BASE + paicrypt_cnt)
return -EINVAL;
/* Allow only CPU wide operation, no process context for now. */
if (event->hw.target || event->cpu == -1)
@@ -208,6 +209,12 @@ static int paicrypt_event_init(struct perf_event *event)
if (rc)
return rc;
+ /* Event initialization sets last_tag to 0. When later on the events
+ * are deleted and re-added, do not reset the event count value to zero.
+ * Events are added, deleted and re-added when 2 or more events
+ * are active at the same time.
+ */
+ event->hw.last_tag = 0;
cpump->event = event;
event->destroy = paicrypt_event_destroy;
@@ -242,9 +249,12 @@ static void paicrypt_start(struct perf_event *event, int flags)
{
u64 sum;
- sum = paicrypt_getall(event); /* Get current value */
- local64_set(&event->hw.prev_count, sum);
- local64_set(&event->count, 0);
+ if (!event->hw.last_tag) {
+ event->hw.last_tag = 1;
+ sum = paicrypt_getall(event); /* Get current value */
+ local64_set(&event->count, 0);
+ local64_set(&event->hw.prev_count, sum);
+ }
}
static int paicrypt_add(struct perf_event *event, int flags)
@@ -356,6 +366,7 @@ static int paicrypt_push_sample(void)
raw.frag.data = cpump->save;
raw.size = raw.frag.size;
data.raw = &raw;
+ data.sample_flags |= PERF_SAMPLE_RAW;
}
overflow = perf_event_overflow(event, &data, &regs);
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
new file mode 100644
index 000000000000..74b53c531e0c
--- /dev/null
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support - Processor Activity Instrumentation Extension
+ * Facility
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+#define KMSG_COMPONENT "pai_ext"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/io.h>
+
+#include <asm/cpu_mcf.h>
+#include <asm/ctl_reg.h>
+#include <asm/pai.h>
+#include <asm/debug.h>
+
+#define PAIE1_CB_SZ 0x200 /* Size of PAIE1 control block */
+#define PAIE1_CTRBLOCK_SZ 0x400 /* Size of PAIE1 counter blocks */
+
+static debug_info_t *paiext_dbg;
+static unsigned int paiext_cnt; /* Extracted with QPACI instruction */
+
+enum paiext_mode {
+ PAI_MODE_NONE,
+ PAI_MODE_SAMPLING,
+ PAI_MODE_COUNTER,
+};
+
+struct pai_userdata {
+ u16 num;
+ u64 value;
+} __packed;
+
+/* Create the PAI extension 1 control block area.
+ * The PAI extension control block 1 is pointed to by lowcore
+ * address 0x1508 for each CPU. This control block is 512 bytes in size
+ * and requires a 512 byte boundary alignment.
+ */
+struct paiext_cb { /* PAI extension 1 control block */
+ u64 header; /* Not used */
+ u64 reserved1;
+ u64 acc; /* Addr to analytics counter control block */
+ u8 reserved2[488];
+} __packed;
+
+struct paiext_map {
+ unsigned long *area; /* Area for CPU to store counters */
+ struct pai_userdata *save; /* Area to store non-zero counters */
+ enum paiext_mode mode; /* Type of event */
+ unsigned int active_events; /* # of PAI Extension users */
+ unsigned int refcnt;
+ struct perf_event *event; /* Perf event for sampling */
+ struct paiext_cb *paiext_cb; /* PAI extension control block area */
+};
+
+struct paiext_mapptr {
+ struct paiext_map *mapptr;
+};
+
+static struct paiext_root { /* Anchor to per CPU data */
+ int refcnt; /* Overall active events */
+ struct paiext_mapptr __percpu *mapptr;
+} paiext_root;
+
+/* Free per CPU data when the last event is removed. */
+static void paiext_root_free(void)
+{
+ if (!--paiext_root.refcnt) {
+ free_percpu(paiext_root.mapptr);
+ paiext_root.mapptr = NULL;
+ }
+}
+
+/* On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int paiext_root_alloc(void)
+{
+ if (++paiext_root.refcnt == 1) {
+ /* The memory is already zeroed. */
+ paiext_root.mapptr = alloc_percpu(struct paiext_mapptr);
+ if (!paiext_root.mapptr) {
+ /* Returing without refcnt adjustment is ok. The
+ * error code is handled by paiext_alloc() which
+ * decrements refcnt when an event can not be
+ * created.
+ */
+ return -ENOMEM;
+ }
+ }
+ return 0;
+}
+
+/* Protects against concurrent increment of sampler and counter member
+ * increments at the same time and prohibits concurrent execution of
+ * counting and sampling events.
+ * Ensures that analytics counter block is deallocated only when the
+ * sampling and counting on that cpu is zero.
+ * For details see paiext_alloc().
+ */
+static DEFINE_MUTEX(paiext_reserve_mutex);
+
+/* Free all memory allocated for event counting/sampling setup */
+static void paiext_free(struct paiext_mapptr *mp)
+{
+ kfree(mp->mapptr->area);
+ kfree(mp->mapptr->paiext_cb);
+ kvfree(mp->mapptr->save);
+ kfree(mp->mapptr);
+ mp->mapptr = NULL;
+}
+
+/* Release the PMU if event is the last perf event */
+static void paiext_event_destroy(struct perf_event *event)
+{
+ struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
+ struct paiext_map *cpump = mp->mapptr;
+
+ mutex_lock(&paiext_reserve_mutex);
+ cpump->event = NULL;
+ if (!--cpump->refcnt) /* Last reference gone */
+ paiext_free(mp);
+ paiext_root_free();
+ mutex_unlock(&paiext_reserve_mutex);
+ debug_sprintf_event(paiext_dbg, 4, "%s cpu %d mapptr %p\n", __func__,
+ event->cpu, mp->mapptr);
+
+}
+
+/* Used to avoid races in checking concurrent access of counting and
+ * sampling for pai_extension events.
+ *
+ * Only one instance of event pai_ext/NNPA_ALL/ for sampling is
+ * allowed and when this event is running, no counting event is allowed.
+ * Several counting events are allowed in parallel, but no sampling event
+ * is allowed while one (or more) counting events are running.
+ *
+ * This function is called in process context and it is safe to block.
+ * When the event initialization functions fails, no other call back will
+ * be invoked.
+ *
+ * Allocate the memory for the event.
+ */
+static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
+{
+ struct paiext_mapptr *mp;
+ struct paiext_map *cpump;
+ int rc;
+
+ mutex_lock(&paiext_reserve_mutex);
+
+ rc = paiext_root_alloc();
+ if (rc)
+ goto unlock;
+
+ mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
+ cpump = mp->mapptr;
+ if (!cpump) { /* Paiext_map allocated? */
+ rc = -ENOMEM;
+ cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
+ if (!cpump)
+ goto unlock;
+
+ /* Allocate memory for counter area and counter extraction.
+ * These are
+ * - a 512 byte block and requires 512 byte boundary alignment.
+ * - a 1KB byte block and requires 1KB boundary alignment.
+ * Only the first counting event has to allocate the area.
+ *
+ * Note: This works with commit 59bb47985c1d by default.
+ * Backporting this to kernels without this commit might
+ * need adjustment.
+ */
+ mp->mapptr = cpump;
+ cpump->area = kzalloc(PAIE1_CTRBLOCK_SZ, GFP_KERNEL);
+ cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL);
+ cpump->save = kvmalloc_array(paiext_cnt + 1,
+ sizeof(struct pai_userdata),
+ GFP_KERNEL);
+ if (!cpump->save || !cpump->area || !cpump->paiext_cb) {
+ paiext_free(mp);
+ goto unlock;
+ }
+ cpump->mode = a->sample_period ? PAI_MODE_SAMPLING
+ : PAI_MODE_COUNTER;
+ } else {
+ /* Multiple invocation, check whats active.
+ * Supported are multiple counter events or only one sampling
+ * event concurrently at any one time.
+ */
+ if (cpump->mode == PAI_MODE_SAMPLING ||
+ (cpump->mode == PAI_MODE_COUNTER && a->sample_period)) {
+ rc = -EBUSY;
+ goto unlock;
+ }
+ }
+
+ rc = 0;
+ cpump->event = event;
+ ++cpump->refcnt;
+
+unlock:
+ if (rc) {
+ /* Error in allocation of event, decrement anchor. Since
+ * the event in not created, its destroy() function is never
+ * invoked. Adjust the reference counter for the anchor.
+ */
+ paiext_root_free();
+ }
+ mutex_unlock(&paiext_reserve_mutex);
+ /* If rc is non-zero, no increment of counter/sampler was done. */
+ return rc;
+}
+
+/* The PAI extension 1 control block supports up to 128 entries. Return
+ * the index within PAIE1_CB given the event number. Also validate event
+ * number.
+ */
+static int paiext_event_valid(struct perf_event *event)
+{
+ u64 cfg = event->attr.config;
+
+ if (cfg >= PAI_NNPA_BASE && cfg <= PAI_NNPA_BASE + paiext_cnt) {
+ /* Offset NNPA in paiext_cb */
+ event->hw.config_base = offsetof(struct paiext_cb, acc);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+/* Might be called on different CPU than the one the event is intended for. */
+static int paiext_event_init(struct perf_event *event)
+{
+ struct perf_event_attr *a = &event->attr;
+ int rc;
+
+ /* PMU pai_ext registered as PERF_TYPE_RAW, check event type */
+ if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
+ return -ENOENT;
+ /* PAI extension event must be valid and in supported range */
+ rc = paiext_event_valid(event);
+ if (rc)
+ return rc;
+ /* Allow only CPU wide operation, no process context for now. */
+ if (event->hw.target || event->cpu == -1)
+ return -ENOENT;
+ /* Allow only event NNPA_ALL for sampling. */
+ if (a->sample_period && a->config != PAI_NNPA_BASE)
+ return -EINVAL;
+ /* Prohibit exclude_user event selection */
+ if (a->exclude_user)
+ return -EINVAL;
+
+ rc = paiext_alloc(a, event);
+ if (rc)
+ return rc;
+ event->hw.last_tag = 0;
+ event->destroy = paiext_event_destroy;
+
+ if (a->sample_period) {
+ a->sample_period = 1;
+ a->freq = 0;
+ /* Register for paicrypt_sched_task() to be called */
+ event->attach_state |= PERF_ATTACH_SCHED_CB;
+ /* Add raw data which are the memory mapped counters */
+ a->sample_type |= PERF_SAMPLE_RAW;
+ /* Turn off inheritance */
+ a->inherit = 0;
+ }
+
+ return 0;
+}
+
+static u64 paiext_getctr(struct paiext_map *cpump, int nr)
+{
+ return cpump->area[nr];
+}
+
+/* Read the counter values. Return value from location in buffer. For event
+ * NNPA_ALL sum up all events.
+ */
+static u64 paiext_getdata(struct perf_event *event)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ u64 sum = 0;
+ int i;
+
+ if (event->attr.config != PAI_NNPA_BASE)
+ return paiext_getctr(cpump, event->attr.config - PAI_NNPA_BASE);
+
+ for (i = 1; i <= paiext_cnt; i++)
+ sum += paiext_getctr(cpump, i);
+
+ return sum;
+}
+
+static u64 paiext_getall(struct perf_event *event)
+{
+ return paiext_getdata(event);
+}
+
+static void paiext_read(struct perf_event *event)
+{
+ u64 prev, new, delta;
+
+ prev = local64_read(&event->hw.prev_count);
+ new = paiext_getall(event);
+ local64_set(&event->hw.prev_count, new);
+ delta = new - prev;
+ local64_add(delta, &event->count);
+}
+
+static void paiext_start(struct perf_event *event, int flags)
+{
+ u64 sum;
+
+ if (event->hw.last_tag)
+ return;
+ event->hw.last_tag = 1;
+ sum = paiext_getall(event); /* Get current value */
+ local64_set(&event->hw.prev_count, sum);
+ local64_set(&event->count, 0);
+}
+
+static int paiext_add(struct perf_event *event, int flags)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ struct paiext_cb *pcb = cpump->paiext_cb;
+
+ if (++cpump->active_events == 1) {
+ S390_lowcore.aicd = virt_to_phys(cpump->paiext_cb);
+ pcb->acc = virt_to_phys(cpump->area) | 0x1;
+ /* Enable CPU instruction lookup for PAIE1 control block */
+ __ctl_set_bit(0, 49);
+ debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
+ __func__, S390_lowcore.aicd, pcb->acc);
+ }
+ if (flags & PERF_EF_START && !event->attr.sample_period) {
+ /* Only counting needs initial counter value */
+ paiext_start(event, PERF_EF_RELOAD);
+ }
+ event->hw.state = 0;
+ if (event->attr.sample_period) {
+ cpump->event = event;
+ perf_sched_cb_inc(event->pmu);
+ }
+ return 0;
+}
+
+static void paiext_stop(struct perf_event *event, int flags)
+{
+ paiext_read(event);
+ event->hw.state = PERF_HES_STOPPED;
+}
+
+static void paiext_del(struct perf_event *event, int flags)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ struct paiext_cb *pcb = cpump->paiext_cb;
+
+ if (event->attr.sample_period)
+ perf_sched_cb_dec(event->pmu);
+ if (!event->attr.sample_period) {
+ /* Only counting needs to read counter */
+ paiext_stop(event, PERF_EF_UPDATE);
+ }
+ if (--cpump->active_events == 0) {
+ /* Disable CPU instruction lookup for PAIE1 control block */
+ __ctl_clear_bit(0, 49);
+ pcb->acc = 0;
+ S390_lowcore.aicd = 0;
+ debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
+ __func__, S390_lowcore.aicd, pcb->acc);
+ }
+}
+
+/* Create raw data and save it in buffer. Returns number of bytes copied.
+ * Saves only positive counter entries of the form
+ * 2 bytes: Number of counter
+ * 8 bytes: Value of counter
+ */
+static size_t paiext_copy(struct paiext_map *cpump)
+{
+ struct pai_userdata *userdata = cpump->save;
+ int i, outidx = 0;
+
+ for (i = 1; i <= paiext_cnt; i++) {
+ u64 val = paiext_getctr(cpump, i);
+
+ if (val) {
+ userdata[outidx].num = i;
+ userdata[outidx].value = val;
+ outidx++;
+ }
+ }
+ return outidx * sizeof(*userdata);
+}
+
+/* Write sample when one or more counters values are nonzero.
+ *
+ * Note: The function paiext_sched_task() and paiext_push_sample() are not
+ * invoked after function paiext_del() has been called because of function
+ * perf_sched_cb_dec().
+ * The function paiext_sched_task() and paiext_push_sample() are only
+ * called when sampling is active. Function perf_sched_cb_inc()
+ * has been invoked to install function paiext_sched_task() as call back
+ * to run at context switch time (see paiext_add()).
+ *
+ * This causes function perf_event_context_sched_out() and
+ * perf_event_context_sched_in() to check whether the PMU has installed an
+ * sched_task() callback. That callback is not active after paiext_del()
+ * returns and has deleted the event on that CPU.
+ */
+static int paiext_push_sample(void)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ struct perf_event *event = cpump->event;
+ struct perf_sample_data data;
+ struct perf_raw_record raw;
+ struct pt_regs regs;
+ size_t rawsize;
+ int overflow;
+
+ rawsize = paiext_copy(cpump);
+ if (!rawsize) /* No incremented counters */
+ return 0;
+
+ /* Setup perf sample */
+ memset(&regs, 0, sizeof(regs));
+ memset(&raw, 0, sizeof(raw));
+ memset(&data, 0, sizeof(data));
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+ if (event->attr.sample_type & PERF_SAMPLE_TID) {
+ data.tid_entry.pid = task_tgid_nr(current);
+ data.tid_entry.tid = task_pid_nr(current);
+ }
+ if (event->attr.sample_type & PERF_SAMPLE_TIME)
+ data.time = event->clock();
+ if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
+ data.id = event->id;
+ if (event->attr.sample_type & PERF_SAMPLE_CPU)
+ data.cpu_entry.cpu = smp_processor_id();
+ if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+ raw.frag.size = rawsize;
+ raw.frag.data = cpump->save;
+ raw.size = raw.frag.size;
+ data.raw = &raw;
+ data.sample_flags |= PERF_SAMPLE_RAW;
+ }
+
+ overflow = perf_event_overflow(event, &data, &regs);
+ perf_event_update_userpage(event);
+ /* Clear lowcore area after read */
+ memset(cpump->area, 0, PAIE1_CTRBLOCK_SZ);
+ return overflow;
+}
+
+/* Called on schedule-in and schedule-out. No access to event structure,
+ * but for sampling only event NNPA_ALL is allowed.
+ */
+static void paiext_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+ /* We started with a clean page on event installation. So read out
+ * results on schedule_out and if page was dirty, clear values.
+ */
+ if (!sched_in)
+ paiext_push_sample();
+}
+
+/* Attribute definitions for pai extension1 interface. As with other CPU
+ * Measurement Facilities, there is one attribute per mapped counter.
+ * The number of mapped counters may vary per machine generation. Use
+ * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction
+ * to determine the number of mapped counters. The instructions returns
+ * a positive number, which is the highest number of supported counters.
+ * All counters less than this number are also supported, there are no
+ * holes. A returned number of zero means no support for mapped counters.
+ *
+ * The identification of the counter is a unique number. The chosen range
+ * is 0x1800 + offset in mapped kernel page.
+ * All CPU Measurement Facility counters identifiers must be unique and
+ * the numbers from 0 to 496 are already used for the CPU Measurement
+ * Counter facility. Number 0x1000 to 0x103e are used for PAI cryptography
+ * counters.
+ * Numbers 0xb0000, 0xbc000 and 0xbd000 are already
+ * used for the CPU Measurement Sampling facility.
+ */
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *paiext_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group paiext_events_group = {
+ .name = "events",
+ .attrs = NULL, /* Filled in attr_event_init() */
+};
+
+static struct attribute_group paiext_format_group = {
+ .name = "format",
+ .attrs = paiext_format_attr,
+};
+
+static const struct attribute_group *paiext_attr_groups[] = {
+ &paiext_events_group,
+ &paiext_format_group,
+ NULL,
+};
+
+/* Performance monitoring unit for mapped counters */
+static struct pmu paiext = {
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = paiext_event_init,
+ .add = paiext_add,
+ .del = paiext_del,
+ .start = paiext_start,
+ .stop = paiext_stop,
+ .read = paiext_read,
+ .sched_task = paiext_sched_task,
+ .attr_groups = paiext_attr_groups,
+};
+
+/* List of symbolic PAI extension 1 NNPA counter names. */
+static const char * const paiext_ctrnames[] = {
+ [0] = "NNPA_ALL",
+ [1] = "NNPA_ADD",
+ [2] = "NNPA_SUB",
+ [3] = "NNPA_MUL",
+ [4] = "NNPA_DIV",
+ [5] = "NNPA_MIN",
+ [6] = "NNPA_MAX",
+ [7] = "NNPA_LOG",
+ [8] = "NNPA_EXP",
+ [9] = "NNPA_IBM_RESERVED_9",
+ [10] = "NNPA_RELU",
+ [11] = "NNPA_TANH",
+ [12] = "NNPA_SIGMOID",
+ [13] = "NNPA_SOFTMAX",
+ [14] = "NNPA_BATCHNORM",
+ [15] = "NNPA_MAXPOOL2D",
+ [16] = "NNPA_AVGPOOL2D",
+ [17] = "NNPA_LSTMACT",
+ [18] = "NNPA_GRUACT",
+ [19] = "NNPA_CONVOLUTION",
+ [20] = "NNPA_MATMUL_OP",
+ [21] = "NNPA_MATMUL_OP_BCAST23",
+ [22] = "NNPA_SMALLBATCH",
+ [23] = "NNPA_LARGEDIM",
+ [24] = "NNPA_SMALLTENSOR",
+ [25] = "NNPA_1MFRAME",
+ [26] = "NNPA_2GFRAME",
+ [27] = "NNPA_ACCESSEXCEPT",
+};
+
+static void __init attr_event_free(struct attribute **attrs, int num)
+{
+ struct perf_pmu_events_attr *pa;
+ struct device_attribute *dap;
+ int i;
+
+ for (i = 0; i < num; i++) {
+ dap = container_of(attrs[i], struct device_attribute, attr);
+ pa = container_of(dap, struct perf_pmu_events_attr, attr);
+ kfree(pa);
+ }
+ kfree(attrs);
+}
+
+static int __init attr_event_init_one(struct attribute **attrs, int num)
+{
+ struct perf_pmu_events_attr *pa;
+
+ pa = kzalloc(sizeof(*pa), GFP_KERNEL);
+ if (!pa)
+ return -ENOMEM;
+
+ sysfs_attr_init(&pa->attr.attr);
+ pa->id = PAI_NNPA_BASE + num;
+ pa->attr.attr.name = paiext_ctrnames[num];
+ pa->attr.attr.mode = 0444;
+ pa->attr.show = cpumf_events_sysfs_show;
+ pa->attr.store = NULL;
+ attrs[num] = &pa->attr.attr;
+ return 0;
+}
+
+/* Create PMU sysfs event attributes on the fly. */
+static int __init attr_event_init(void)
+{
+ struct attribute **attrs;
+ int ret, i;
+
+ attrs = kmalloc_array(ARRAY_SIZE(paiext_ctrnames) + 1, sizeof(*attrs),
+ GFP_KERNEL);
+ if (!attrs)
+ return -ENOMEM;
+ for (i = 0; i < ARRAY_SIZE(paiext_ctrnames); i++) {
+ ret = attr_event_init_one(attrs, i);
+ if (ret) {
+ attr_event_free(attrs, i - 1);
+ return ret;
+ }
+ }
+ attrs[i] = NULL;
+ paiext_events_group.attrs = attrs;
+ return 0;
+}
+
+static int __init paiext_init(void)
+{
+ struct qpaci_info_block ib;
+ int rc = -ENOMEM;
+
+ if (!test_facility(197))
+ return 0;
+
+ qpaci(&ib);
+ paiext_cnt = ib.num_nnpa;
+ if (paiext_cnt >= PAI_NNPA_MAXCTR)
+ paiext_cnt = PAI_NNPA_MAXCTR;
+ if (!paiext_cnt)
+ return 0;
+
+ rc = attr_event_init();
+ if (rc) {
+ pr_err("Creation of PMU " KMSG_COMPONENT " /sysfs failed\n");
+ return rc;
+ }
+
+ /* Setup s390dbf facility */
+ paiext_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128);
+ if (!paiext_dbg) {
+ pr_err("Registration of s390dbf " KMSG_COMPONENT " failed\n");
+ rc = -ENOMEM;
+ goto out_init;
+ }
+ debug_register_view(paiext_dbg, &debug_sprintf_view);
+
+ rc = perf_pmu_register(&paiext, KMSG_COMPONENT, -1);
+ if (rc) {
+ pr_err("Registration of " KMSG_COMPONENT " PMU failed with "
+ "rc=%i\n", rc);
+ goto out_pmu;
+ }
+
+ return 0;
+
+out_pmu:
+ debug_unregister_view(paiext_dbg, &debug_sprintf_view);
+ debug_unregister(paiext_dbg);
+out_init:
+ attr_event_free(paiext_events_group.attrs,
+ ARRAY_SIZE(paiext_ctrnames) + 1);
+ return rc;
+}
+
+device_initcall(paiext_init);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 89949b9f3cf8..42af4b3aa02b 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -91,6 +91,18 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
memcpy(dst, src, arch_task_struct_size);
dst->thread.fpu.regs = dst->thread.fpu.fprs;
+
+ /*
+ * Don't transfer over the runtime instrumentation or the guarded
+ * storage control block pointers. These fields are cleared here instead
+ * of in copy_thread() to avoid premature freeing of associated memory
+ * on fork() failure. Wait to clear the RI flag because ->stack still
+ * refers to the source thread.
+ */
+ dst->thread.ri_cb = NULL;
+ dst->thread.gs_cb = NULL;
+ dst->thread.gs_bc_cb = NULL;
+
return 0;
}
@@ -150,13 +162,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
frame->childregs.flags = 0;
if (new_stackp)
frame->childregs.gprs[15] = new_stackp;
-
- /* Don't copy runtime instrumentation info */
- p->thread.ri_cb = NULL;
+ /*
+ * Clear the runtime instrumentation flag after the above childregs
+ * copy. The CB pointer was already cleared in arch_dup_task_struct().
+ */
frame->childregs.psw.mask &= ~PSW_MASK_RI;
- /* Don't copy guarded storage control block */
- p->thread.gs_cb = NULL;
- p->thread.gs_bc_cb = NULL;
/* Set a new TLS ? */
if (clone_flags & CLONE_SETTLS) {
@@ -214,13 +224,13 @@ unsigned long __get_wchan(struct task_struct *p)
unsigned long arch_align_stack(unsigned long sp)
{
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
- sp -= get_random_int() & ~PAGE_MASK;
+ sp -= prandom_u32_max(PAGE_SIZE);
return sp & ~0xf;
}
static inline unsigned long brk_rnd(void)
{
- return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT;
+ return (get_random_u16() & BRK_RND_MASK) << PAGE_SHIFT;
}
unsigned long arch_randomize_brk(struct mm_struct *mm)
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index aa0e0e7fc773..a194611ba88c 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -8,7 +8,6 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/stop_machine.h>
-#include <linux/cpufeature.h>
#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/random.h>
@@ -96,15 +95,6 @@ void cpu_init(void)
enter_lazy_tlb(&init_mm, current);
}
-/*
- * cpu_have_feature - Test CPU features on module initialization
- */
-int cpu_have_feature(unsigned int num)
-{
- return elf_hwcap & (1UL << num);
-}
-EXPORT_SYMBOL(cpu_have_feature);
-
static void show_facilities(struct seq_file *m)
{
unsigned int bit;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 8d91eccc0963..ab19ddb09d65 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -58,7 +58,7 @@
#include <asm/smp.h>
#include <asm/mmu_context.h>
#include <asm/cpcmd.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
#include <asm/nmi.h>
#include <asm/irq.h>
#include <asm/page.h>
@@ -74,6 +74,7 @@
#include <asm/alternative.h>
#include <asm/nospec-branch.h>
#include <asm/mem_detect.h>
+#include <asm/maccess.h>
#include <asm/uv.h>
#include <asm/asm-offsets.h>
#include "entry.h"
@@ -395,6 +396,7 @@ void __init arch_call_rest_init(void)
{
unsigned long stack;
+ smp_reinit_ipl_cpu();
stack = stack_alloc();
if (!stack)
panic("Couldn't allocate kernel stack");
@@ -411,8 +413,9 @@ void __init arch_call_rest_init(void)
static void __init setup_lowcore_dat_off(void)
{
unsigned long int_psw_mask = PSW_KERNEL_BITS;
+ struct lowcore *abs_lc, *lc;
unsigned long mcck_stack;
- struct lowcore *lc;
+ unsigned long flags;
if (IS_ENABLED(CONFIG_KASAN))
int_psw_mask |= PSW_MASK_DAT;
@@ -474,19 +477,21 @@ static void __init setup_lowcore_dat_off(void)
lc->restart_data = 0;
lc->restart_source = -1U;
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->restart_stack = lc->restart_stack;
+ abs_lc->restart_fn = lc->restart_fn;
+ abs_lc->restart_data = lc->restart_data;
+ abs_lc->restart_source = lc->restart_source;
+ abs_lc->restart_psw = lc->restart_psw;
+ abs_lc->mcesad = lc->mcesad;
+ put_abs_lowcore(abs_lc, flags);
+
mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
if (!mcck_stack)
panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
__func__, THREAD_SIZE, THREAD_SIZE);
lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET;
- /* Setup absolute zero lowcore */
- put_abs_lowcore(restart_stack, lc->restart_stack);
- put_abs_lowcore(restart_fn, lc->restart_fn);
- put_abs_lowcore(restart_data, lc->restart_data);
- put_abs_lowcore(restart_source, lc->restart_source);
- put_abs_lowcore(restart_psw, lc->restart_psw);
-
lc->spinlock_lockval = arch_spin_lockval(0);
lc->spinlock_index = 0;
arch_spin_lock_setup(0);
@@ -500,20 +505,25 @@ static void __init setup_lowcore_dat_off(void)
static void __init setup_lowcore_dat_on(void)
{
- struct lowcore *lc = lowcore_ptr[0];
- int cr;
+ struct lowcore *abs_lc;
+ unsigned long flags;
__ctl_clear_bit(0, 28);
S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT;
S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT;
S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
- __ctl_store(S390_lowcore.cregs_save_area, 0, 15);
__ctl_set_bit(0, 28);
- put_abs_lowcore(restart_flags, RESTART_FLAG_CTLREGS);
- put_abs_lowcore(program_new_psw, lc->program_new_psw);
- for (cr = 0; cr < ARRAY_SIZE(lc->cregs_save_area); cr++)
- put_abs_lowcore(cregs_save_area[cr], lc->cregs_save_area[cr]);
+ __ctl_store(S390_lowcore.cregs_save_area, 0, 15);
+ if (abs_lowcore_map(0, lowcore_ptr[0], true))
+ panic("Couldn't setup absolute lowcore");
+ abs_lowcore_mapped = true;
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
+ abs_lc->program_new_psw = S390_lowcore.program_new_psw;
+ memcpy(abs_lc->cregs_save_area, S390_lowcore.cregs_save_area,
+ sizeof(abs_lc->cregs_save_area));
+ put_abs_lowcore(abs_lc, flags);
}
static struct resource code_resource = {
@@ -875,6 +885,9 @@ static void __init setup_randomness(void)
if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
memblock_free(vmms, PAGE_SIZE);
+
+ if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
+ static_branch_enable(&s390_arch_random_available);
}
/*
@@ -1016,10 +1029,10 @@ void __init setup_arch(char **cmdline_p)
reserve_crashkernel();
#ifdef CONFIG_CRASH_DUMP
/*
- * Be aware that smp_save_dump_cpus() triggers a system reset.
+ * Be aware that smp_save_dump_secondary_cpus() triggers a system reset.
* Therefore CPU and device initialization should be done afterwards.
*/
- smp_save_dump_cpus();
+ smp_save_dump_secondary_cpus();
#endif
setup_resources();
@@ -1038,12 +1051,15 @@ void __init setup_arch(char **cmdline_p)
* Create kernel page tables and switch to virtual addressing.
*/
paging_init();
-
+ memcpy_real_init();
/*
* After paging_init created the kernel page table, the new PSWs
* in lowcore can now run with DAT enabled.
*/
setup_lowcore_dat_on();
+#ifdef CONFIG_CRASH_DUMP
+ smp_save_dump_ipl_cpu();
+#endif
/* Setup default console */
conmode_default();
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 30c91d565933..0031325ce4bc 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -45,7 +45,7 @@
#include <asm/irq.h>
#include <asm/tlbflush.h>
#include <asm/vtimer.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
#include <asm/sclp.h>
#include <asm/debug.h>
#include <asm/os_info.h>
@@ -55,6 +55,7 @@
#include <asm/stacktrace.h>
#include <asm/topology.h>
#include <asm/vdso.h>
+#include <asm/maccess.h>
#include "entry.h"
enum {
@@ -212,10 +213,14 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
lc->preempt_count = PREEMPT_DISABLED;
if (nmi_alloc_mcesa(&lc->mcesad))
goto out;
+ if (abs_lowcore_map(cpu, lc, true))
+ goto out_mcesa;
lowcore_ptr[cpu] = lc;
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc));
return 0;
+out_mcesa:
+ nmi_free_mcesa(&lc->mcesad);
out:
stack_free(mcck_stack);
stack_free(async_stack);
@@ -237,6 +242,7 @@ static void pcpu_free_lowcore(struct pcpu *pcpu)
mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET;
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
lowcore_ptr[cpu] = NULL;
+ abs_lowcore_unmap(cpu);
nmi_free_mcesa(&lc->mcesad);
stack_free(async_stack);
stack_free(mcck_stack);
@@ -315,9 +321,12 @@ static void pcpu_delegate(struct pcpu *pcpu,
pcpu_delegate_fn *func,
void *data, unsigned long stack)
{
- struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
- unsigned int source_cpu = stap();
+ struct lowcore *lc, *abs_lc;
+ unsigned int source_cpu;
+ unsigned long flags;
+ lc = lowcore_ptr[pcpu - pcpu_devices];
+ source_cpu = stap();
__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
if (pcpu->address == source_cpu) {
call_on_stack(2, stack, void, __pcpu_delegate,
@@ -332,10 +341,12 @@ static void pcpu_delegate(struct pcpu *pcpu,
lc->restart_data = (unsigned long)data;
lc->restart_source = source_cpu;
} else {
- put_abs_lowcore(restart_stack, stack);
- put_abs_lowcore(restart_fn, (unsigned long)func);
- put_abs_lowcore(restart_data, (unsigned long)data);
- put_abs_lowcore(restart_source, source_cpu);
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->restart_stack = stack;
+ abs_lc->restart_fn = (unsigned long)func;
+ abs_lc->restart_data = (unsigned long)data;
+ abs_lc->restart_source = source_cpu;
+ put_abs_lowcore(abs_lc, flags);
}
__bpon();
asm volatile(
@@ -581,6 +592,8 @@ static DEFINE_SPINLOCK(ctl_lock);
void smp_ctl_set_clear_bit(int cr, int bit, bool set)
{
struct ec_creg_mask_parms parms = { .cr = cr, };
+ struct lowcore *abs_lc;
+ unsigned long flags;
u64 ctlreg;
if (set) {
@@ -591,9 +604,11 @@ void smp_ctl_set_clear_bit(int cr, int bit, bool set)
parms.andval = ~(1UL << bit);
}
spin_lock(&ctl_lock);
- get_abs_lowcore(ctlreg, cregs_save_area[cr]);
+ abs_lc = get_abs_lowcore(&flags);
+ ctlreg = abs_lc->cregs_save_area[cr];
ctlreg = (ctlreg & parms.andval) | parms.orval;
- put_abs_lowcore(cregs_save_area[cr], ctlreg);
+ abs_lc->cregs_save_area[cr] = ctlreg;
+ put_abs_lowcore(abs_lc, flags);
spin_unlock(&ctl_lock);
on_each_cpu(smp_ctl_bit_callback, &parms, 1);
}
@@ -650,35 +665,36 @@ int smp_store_status(int cpu)
* This case does not exist for s390 anymore, setup_arch explicitly
* deactivates the elfcorehdr= kernel parameter
*/
-static __init void smp_save_cpu_vxrs(struct save_area *sa, u16 addr,
- bool is_boot_cpu, __vector128 *vxrs)
+static bool dump_available(void)
{
- if (is_boot_cpu)
- vxrs = boot_cpu_vector_save_area;
- else
- __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(vxrs));
- save_area_add_vxrs(sa, vxrs);
+ return oldmem_data.start || is_ipl_type_dump();
}
-static __init void smp_save_cpu_regs(struct save_area *sa, u16 addr,
- bool is_boot_cpu, void *regs)
+void __init smp_save_dump_ipl_cpu(void)
{
- if (is_boot_cpu)
- copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512);
- else
- __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(regs));
+ struct save_area *sa;
+ void *regs;
+
+ if (!dump_available())
+ return;
+ sa = save_area_alloc(true);
+ regs = memblock_alloc(512, 8);
+ if (!sa || !regs)
+ panic("could not allocate memory for boot CPU save area\n");
+ copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512);
save_area_add_regs(sa, regs);
+ memblock_free(regs, 512);
+ if (MACHINE_HAS_VX)
+ save_area_add_vxrs(sa, boot_cpu_vector_save_area);
}
-void __init smp_save_dump_cpus(void)
+void __init smp_save_dump_secondary_cpus(void)
{
int addr, boot_cpu_addr, max_cpu_addr;
struct save_area *sa;
- bool is_boot_cpu;
void *page;
- if (!(oldmem_data.start || is_ipl_type_dump()))
- /* No previous system present, normal boot. */
+ if (!dump_available())
return;
/* Allocate a page as dumping area for the store status sigps */
page = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
@@ -691,26 +707,20 @@ void __init smp_save_dump_cpus(void)
boot_cpu_addr = stap();
max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev;
for (addr = 0; addr <= max_cpu_addr; addr++) {
+ if (addr == boot_cpu_addr)
+ continue;
if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0) ==
SIGP_CC_NOT_OPERATIONAL)
continue;
- is_boot_cpu = (addr == boot_cpu_addr);
- /* Allocate save area */
- sa = save_area_alloc(is_boot_cpu);
+ sa = save_area_alloc(false);
if (!sa)
panic("could not allocate memory for save area\n");
- if (MACHINE_HAS_VX)
- /* Get the vector registers */
- smp_save_cpu_vxrs(sa, addr, is_boot_cpu, page);
- /*
- * For a zfcp/nvme dump OLDMEM_BASE == NULL and the registers
- * of the boot CPU are stored in the HSA. To retrieve
- * these registers an SCLP request is required which is
- * done by drivers/s390/char/zcore.c:init_cpu_info()
- */
- if (!is_boot_cpu || oldmem_data.start)
- /* Get the CPU registers */
- smp_save_cpu_regs(sa, addr, is_boot_cpu, page);
+ __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page));
+ save_area_add_regs(sa, page);
+ if (MACHINE_HAS_VX) {
+ __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(page));
+ save_area_add_vxrs(sa, page);
+ }
}
memblock_free(page, PAGE_SIZE);
diag_amode31_ops.diag308_reset();
@@ -1256,7 +1266,7 @@ static __always_inline void set_new_lowcore(struct lowcore *lc)
: "memory", "cc");
}
-static int __init smp_reinit_ipl_cpu(void)
+int __init smp_reinit_ipl_cpu(void)
{
unsigned long async_stack, nodat_stack, mcck_stack;
struct lowcore *lc, *lc_ipl;
@@ -1281,6 +1291,8 @@ static int __init smp_reinit_ipl_cpu(void)
__ctl_clear_bit(0, 28); /* disable lowcore protection */
S390_lowcore.mcesad = mcesad;
__ctl_load(cr0, 0, 0);
+ if (abs_lowcore_map(0, lc, false))
+ panic("Couldn't remap absolute lowcore");
lowcore_ptr[0] = lc;
local_mcck_enable();
local_irq_restore(flags);
@@ -1291,4 +1303,3 @@ static int __init smp_reinit_ipl_cpu(void)
return 0;
}
-early_initcall(smp_reinit_ipl_cpu);
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index a5425075dd25..f9810d2a267c 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -234,6 +234,32 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr,
return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
}
+/**
+ * should_export_before_import - Determine whether an export is needed
+ * before an import-like operation
+ * @uvcb: the Ultravisor control block of the UVC to be performed
+ * @mm: the mm of the process
+ *
+ * Returns whether an export is needed before every import-like operation.
+ * This is needed for shared pages, which don't trigger a secure storage
+ * exception when accessed from a different guest.
+ *
+ * Although considered as one, the Unpin Page UVC is not an actual import,
+ * so it is not affected.
+ *
+ * No export is needed also when there is only one protected VM, because the
+ * page cannot belong to the wrong VM in that case (there is no "other VM"
+ * it can belong to).
+ *
+ * Return: true if an export is needed before every import, otherwise false.
+ */
+static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+{
+ if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
+ return false;
+ return atomic_read(&mm->context.protected_count) > 1;
+}
+
/*
* Requests the Ultravisor to make a page accessible to a guest.
* If it's brought in the first time, it will be cleared. If
@@ -277,6 +303,8 @@ again:
lock_page(page);
ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
+ if (should_export_before_import(uvcb, gmap->mm))
+ uv_convert_from_secure(page_to_phys(page));
rc = make_secure_pte(ptep, uaddr, page, uvcb);
pte_unmap_unlock(ptep, ptelock);
unlock_page(page);
@@ -334,6 +362,61 @@ int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
}
EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
+/**
+ * gmap_destroy_page - Destroy a guest page.
+ * @gmap: the gmap of the guest
+ * @gaddr: the guest address to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ */
+int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
+{
+ struct vm_area_struct *vma;
+ unsigned long uaddr;
+ struct page *page;
+ int rc;
+
+ rc = -EFAULT;
+ mmap_read_lock(gmap->mm);
+
+ uaddr = __gmap_translate(gmap, gaddr);
+ if (IS_ERR_VALUE(uaddr))
+ goto out;
+ vma = vma_lookup(gmap->mm, uaddr);
+ if (!vma)
+ goto out;
+ /*
+ * Huge pages should not be able to become secure
+ */
+ if (is_vm_hugetlb_page(vma))
+ goto out;
+
+ rc = 0;
+ /* we take an extra reference here */
+ page = follow_page(vma, uaddr, FOLL_WRITE | FOLL_GET);
+ if (IS_ERR_OR_NULL(page))
+ goto out;
+ rc = uv_destroy_owned_page(page_to_phys(page));
+ /*
+ * Fault handlers can race; it is possible that two CPUs will fault
+ * on the same secure page. One CPU can destroy the page, reboot,
+ * re-enter secure mode and import it, while the second CPU was
+ * stuck at the beginning of the handler. At some point the second
+ * CPU will be able to progress, and it will not be able to destroy
+ * the page. In that case we do not want to terminate the process,
+ * we instead try to export the page.
+ */
+ if (rc)
+ rc = uv_convert_owned_from_secure(page_to_phys(page));
+ put_page(page);
+out:
+ mmap_read_unlock(gmap->mm);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_destroy_page);
+
/*
* To be called with the page locked or with an extra reference! This will
* prevent gmap_make_secure from touching the page concurrently. Having 2
@@ -392,6 +475,54 @@ static ssize_t uv_query_facilities(struct kobject *kobj,
static struct kobj_attribute uv_query_facilities_attr =
__ATTR(facilities, 0444, uv_query_facilities, NULL);
+static ssize_t uv_query_supp_se_hdr_ver(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_ver);
+}
+
+static struct kobj_attribute uv_query_supp_se_hdr_ver_attr =
+ __ATTR(supp_se_hdr_ver, 0444, uv_query_supp_se_hdr_ver, NULL);
+
+static ssize_t uv_query_supp_se_hdr_pcf(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_pcf);
+}
+
+static struct kobj_attribute uv_query_supp_se_hdr_pcf_attr =
+ __ATTR(supp_se_hdr_pcf, 0444, uv_query_supp_se_hdr_pcf, NULL);
+
+static ssize_t uv_query_dump_cpu_len(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return scnprintf(page, PAGE_SIZE, "%lx\n",
+ uv_info.guest_cpu_stor_len);
+}
+
+static struct kobj_attribute uv_query_dump_cpu_len_attr =
+ __ATTR(uv_query_dump_cpu_len, 0444, uv_query_dump_cpu_len, NULL);
+
+static ssize_t uv_query_dump_storage_state_len(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return scnprintf(page, PAGE_SIZE, "%lx\n",
+ uv_info.conf_dump_storage_state_len);
+}
+
+static struct kobj_attribute uv_query_dump_storage_state_len_attr =
+ __ATTR(dump_storage_state_len, 0444, uv_query_dump_storage_state_len, NULL);
+
+static ssize_t uv_query_dump_finalize_len(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return scnprintf(page, PAGE_SIZE, "%lx\n",
+ uv_info.conf_dump_finalize_len);
+}
+
+static struct kobj_attribute uv_query_dump_finalize_len_attr =
+ __ATTR(dump_finalize_len, 0444, uv_query_dump_finalize_len, NULL);
+
static ssize_t uv_query_feature_indications(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -431,12 +562,37 @@ static ssize_t uv_query_max_guest_addr(struct kobject *kobj,
static struct kobj_attribute uv_query_max_guest_addr_attr =
__ATTR(max_address, 0444, uv_query_max_guest_addr, NULL);
+static ssize_t uv_query_supp_att_req_hdr_ver(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.supp_att_req_hdr_ver);
+}
+
+static struct kobj_attribute uv_query_supp_att_req_hdr_ver_attr =
+ __ATTR(supp_att_req_hdr_ver, 0444, uv_query_supp_att_req_hdr_ver, NULL);
+
+static ssize_t uv_query_supp_att_pflags(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.supp_att_pflags);
+}
+
+static struct kobj_attribute uv_query_supp_att_pflags_attr =
+ __ATTR(supp_att_pflags, 0444, uv_query_supp_att_pflags, NULL);
+
static struct attribute *uv_query_attrs[] = {
&uv_query_facilities_attr.attr,
&uv_query_feature_indications_attr.attr,
&uv_query_max_guest_cpus_attr.attr,
&uv_query_max_guest_vms_attr.attr,
&uv_query_max_guest_addr_attr.attr,
+ &uv_query_supp_se_hdr_ver_attr.attr,
+ &uv_query_supp_se_hdr_pcf_attr.attr,
+ &uv_query_dump_storage_state_len_attr.attr,
+ &uv_query_dump_finalize_len_attr.attr,
+ &uv_query_dump_cpu_len_attr.attr,
+ &uv_query_supp_att_req_hdr_ver_attr.attr,
+ &uv_query_supp_att_pflags_attr.attr,
NULL,
};
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 5075cde77b29..3105ca5bd470 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -69,10 +69,11 @@ static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
{
struct mm_struct *mm = task->mm;
+ VMA_ITERATOR(vmi, mm, 0);
struct vm_area_struct *vma;
mmap_read_lock(mm);
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ for_each_vma(vmi, vma) {
unsigned long size = vma->vm_end - vma->vm_start;
if (!vma_is_special_mapping(vma, &vvar_mapping))
@@ -226,7 +227,7 @@ static unsigned long vdso_addr(unsigned long start, unsigned long len)
end -= len;
if (end > start) {
- offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
+ offset = prandom_u32_max(((end - start) >> PAGE_SHIFT) + 1);
addr = start + (offset << PAGE_SHIFT);
} else {
addr = start;
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 2e526f11b91e..5ea3830af0cc 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -131,6 +131,7 @@ SECTIONS
/*
* Table with the patch locations to undo expolines
*/
+ . = ALIGN(4);
.nospec_call_table : {
__nospec_call_start = . ;
*(.s390_indirect*)
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 2e84d3922f7c..33f4ff909476 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -34,6 +34,7 @@ config KVM
select SRCU
select KVM_VFIO
select INTERVAL_TREE
+ select MMU_NOTIFIER
help
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 26f4a74e5ce4..02217fb4ae10 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -10,4 +10,5 @@ ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o
+kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 227ed0009354..0243b6e38d36 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -262,77 +262,77 @@ struct aste {
/* .. more fields there */
};
-int ipte_lock_held(struct kvm_vcpu *vcpu)
+int ipte_lock_held(struct kvm *kvm)
{
- if (vcpu->arch.sie_block->eca & ECA_SII) {
+ if (sclp.has_siif) {
int rc;
- read_lock(&vcpu->kvm->arch.sca_lock);
- rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0;
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ read_lock(&kvm->arch.sca_lock);
+ rc = kvm_s390_get_ipte_control(kvm)->kh != 0;
+ read_unlock(&kvm->arch.sca_lock);
return rc;
}
- return vcpu->kvm->arch.ipte_lock_count != 0;
+ return kvm->arch.ipte_lock_count != 0;
}
-static void ipte_lock_simple(struct kvm_vcpu *vcpu)
+static void ipte_lock_simple(struct kvm *kvm)
{
union ipte_control old, new, *ic;
- mutex_lock(&vcpu->kvm->arch.ipte_mutex);
- vcpu->kvm->arch.ipte_lock_count++;
- if (vcpu->kvm->arch.ipte_lock_count > 1)
+ mutex_lock(&kvm->arch.ipte_mutex);
+ kvm->arch.ipte_lock_count++;
+ if (kvm->arch.ipte_lock_count > 1)
goto out;
retry:
- read_lock(&vcpu->kvm->arch.sca_lock);
- ic = kvm_s390_get_ipte_control(vcpu->kvm);
+ read_lock(&kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(kvm);
do {
old = READ_ONCE(*ic);
if (old.k) {
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ read_unlock(&kvm->arch.sca_lock);
cond_resched();
goto retry;
}
new = old;
new.k = 1;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ read_unlock(&kvm->arch.sca_lock);
out:
- mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
+ mutex_unlock(&kvm->arch.ipte_mutex);
}
-static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
+static void ipte_unlock_simple(struct kvm *kvm)
{
union ipte_control old, new, *ic;
- mutex_lock(&vcpu->kvm->arch.ipte_mutex);
- vcpu->kvm->arch.ipte_lock_count--;
- if (vcpu->kvm->arch.ipte_lock_count)
+ mutex_lock(&kvm->arch.ipte_mutex);
+ kvm->arch.ipte_lock_count--;
+ if (kvm->arch.ipte_lock_count)
goto out;
- read_lock(&vcpu->kvm->arch.sca_lock);
- ic = kvm_s390_get_ipte_control(vcpu->kvm);
+ read_lock(&kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(kvm);
do {
old = READ_ONCE(*ic);
new = old;
new.k = 0;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- read_unlock(&vcpu->kvm->arch.sca_lock);
- wake_up(&vcpu->kvm->arch.ipte_wq);
+ read_unlock(&kvm->arch.sca_lock);
+ wake_up(&kvm->arch.ipte_wq);
out:
- mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
+ mutex_unlock(&kvm->arch.ipte_mutex);
}
-static void ipte_lock_siif(struct kvm_vcpu *vcpu)
+static void ipte_lock_siif(struct kvm *kvm)
{
union ipte_control old, new, *ic;
retry:
- read_lock(&vcpu->kvm->arch.sca_lock);
- ic = kvm_s390_get_ipte_control(vcpu->kvm);
+ read_lock(&kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(kvm);
do {
old = READ_ONCE(*ic);
if (old.kg) {
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ read_unlock(&kvm->arch.sca_lock);
cond_resched();
goto retry;
}
@@ -340,15 +340,15 @@ retry:
new.k = 1;
new.kh++;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ read_unlock(&kvm->arch.sca_lock);
}
-static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
+static void ipte_unlock_siif(struct kvm *kvm)
{
union ipte_control old, new, *ic;
- read_lock(&vcpu->kvm->arch.sca_lock);
- ic = kvm_s390_get_ipte_control(vcpu->kvm);
+ read_lock(&kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(kvm);
do {
old = READ_ONCE(*ic);
new = old;
@@ -356,25 +356,25 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
if (!new.kh)
new.k = 0;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ read_unlock(&kvm->arch.sca_lock);
if (!new.kh)
- wake_up(&vcpu->kvm->arch.ipte_wq);
+ wake_up(&kvm->arch.ipte_wq);
}
-void ipte_lock(struct kvm_vcpu *vcpu)
+void ipte_lock(struct kvm *kvm)
{
- if (vcpu->arch.sie_block->eca & ECA_SII)
- ipte_lock_siif(vcpu);
+ if (sclp.has_siif)
+ ipte_lock_siif(kvm);
else
- ipte_lock_simple(vcpu);
+ ipte_lock_simple(kvm);
}
-void ipte_unlock(struct kvm_vcpu *vcpu)
+void ipte_unlock(struct kvm *kvm)
{
- if (vcpu->arch.sie_block->eca & ECA_SII)
- ipte_unlock_siif(vcpu);
+ if (sclp.has_siif)
+ ipte_unlock_siif(kvm);
else
- ipte_unlock_simple(vcpu);
+ ipte_unlock_simple(kvm);
}
static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
@@ -489,6 +489,8 @@ enum prot_type {
PROT_TYPE_ALC = 2,
PROT_TYPE_DAT = 3,
PROT_TYPE_IEP = 4,
+ /* Dummy value for passing an initialized value when code != PGM_PROTECTION */
+ PROT_NONE,
};
static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
@@ -504,6 +506,10 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva,
switch (code) {
case PGM_PROTECTION:
switch (prot) {
+ case PROT_NONE:
+ /* We should never get here, acts like termination */
+ WARN_ON_ONCE(1);
+ break;
case PROT_TYPE_IEP:
tec->b61 = 1;
fallthrough;
@@ -968,8 +974,10 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
return rc;
} else {
gpa = kvm_s390_real_to_abs(vcpu, ga);
- if (kvm_is_error_gpa(vcpu->kvm, gpa))
+ if (kvm_is_error_gpa(vcpu->kvm, gpa)) {
rc = PGM_ADDRESSING;
+ prot = PROT_NONE;
+ }
}
if (rc)
return trans_exc(vcpu, rc, ga, ar, mode, prot);
@@ -1086,7 +1094,7 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
try_storage_prot_override = storage_prot_override_applicable(vcpu);
need_ipte_lock = psw_bits(*psw).dat && !asce.r;
if (need_ipte_lock)
- ipte_lock(vcpu);
+ ipte_lock(vcpu->kvm);
/*
* Since we do the access further down ultimately via a move instruction
* that does key checking and returns an error in case of a protection
@@ -1112,8 +1120,6 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
if (rc == PGM_PROTECTION && try_storage_prot_override)
rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
data, fragment_len, PAGE_SPO_ACC);
- if (rc == PGM_PROTECTION)
- prot = PROT_TYPE_KEYC;
if (rc)
break;
len -= fragment_len;
@@ -1123,11 +1129,15 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
if (rc > 0) {
bool terminate = (mode == GACC_STORE) && (idx > 0);
+ if (rc == PGM_PROTECTION)
+ prot = PROT_TYPE_KEYC;
+ else
+ prot = PROT_NONE;
rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate);
}
out_unlock:
if (need_ipte_lock)
- ipte_unlock(vcpu);
+ ipte_unlock(vcpu->kvm);
if (nr_pages > ARRAY_SIZE(gpa_array))
vfree(gpas);
return rc;
@@ -1199,10 +1209,10 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
if (rc)
return rc;
- ipte_lock(vcpu);
+ ipte_lock(vcpu->kvm);
rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode,
access_key);
- ipte_unlock(vcpu);
+ ipte_unlock(vcpu->kvm);
return rc;
}
@@ -1465,7 +1475,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
* tables/pointers we read stay valid - unshadowing is however
* always possible - only guest_table_lock protects us.
*/
- ipte_lock(vcpu);
+ ipte_lock(vcpu->kvm);
rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
if (rc)
@@ -1499,7 +1509,7 @@ shadow_page:
pte.p |= dat_protection;
if (!rc)
rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
- ipte_unlock(vcpu);
+ ipte_unlock(vcpu->kvm);
mmap_read_unlock(sg->mm);
return rc;
}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 1124ff282012..9408d6cc8e2c 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -440,9 +440,9 @@ int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
return access_guest_real(vcpu, gra, data, len, 0);
}
-void ipte_lock(struct kvm_vcpu *vcpu);
-void ipte_unlock(struct kvm_vcpu *vcpu);
-int ipte_lock_held(struct kvm_vcpu *vcpu);
+void ipte_lock(struct kvm *kvm);
+void ipte_unlock(struct kvm *kvm);
+int ipte_lock_held(struct kvm *kvm);
int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
/* MVPG PEI indication bits */
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 8bd42a20d924..88112065d941 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -528,12 +528,27 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
static int handle_pv_notification(struct kvm_vcpu *vcpu)
{
+ int ret;
+
if (vcpu->arch.sie_block->ipa == 0xb210)
return handle_pv_spx(vcpu);
if (vcpu->arch.sie_block->ipa == 0xb220)
return handle_pv_sclp(vcpu);
if (vcpu->arch.sie_block->ipa == 0xb9a4)
return handle_pv_uvc(vcpu);
+ if (vcpu->arch.sie_block->ipa >> 8 == 0xae) {
+ /*
+ * Besides external call, other SIGP orders also cause a
+ * 108 (pv notify) intercept. In contrast to external call,
+ * these orders need to be emulated and hence the appropriate
+ * place to handle them is in handle_instruction().
+ * So first try kvm_s390_handle_sigp_pei() and if that isn't
+ * successful, go on with handle_instruction().
+ */
+ ret = kvm_s390_handle_sigp_pei(vcpu);
+ if (!ret)
+ return ret;
+ }
return handle_instruction(vcpu);
}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index af96dc0549a4..ab569faf0df2 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -28,9 +28,11 @@
#include <asm/switch_to.h>
#include <asm/nmi.h>
#include <asm/airq.h>
+#include <asm/tpi.h>
#include "kvm-s390.h"
#include "gaccess.h"
#include "trace-s390.h"
+#include "pci.h"
#define PFAULT_INIT 0x0600
#define PFAULT_DONE 0x0680
@@ -702,7 +704,7 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
/*
* We indicate floating repressible conditions along with
* other pending conditions. Channel Report Pending and Channel
- * Subsystem damage are the only two and and are indicated by
+ * Subsystem damage are the only two and are indicated by
* bits in mcic and masked in cr14.
*/
if (test_and_clear_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
@@ -3311,10 +3313,87 @@ out:
}
EXPORT_SYMBOL_GPL(kvm_s390_gisc_unregister);
-static void gib_alert_irq_handler(struct airq_struct *airq, bool floating)
+static void aen_host_forward(unsigned long si)
{
+ struct kvm_s390_gisa_interrupt *gi;
+ struct zpci_gaite *gaite;
+ struct kvm *kvm;
+
+ gaite = (struct zpci_gaite *)aift->gait +
+ (si * sizeof(struct zpci_gaite));
+ if (gaite->count == 0)
+ return;
+ if (gaite->aisb != 0)
+ set_bit_inv(gaite->aisbo, phys_to_virt(gaite->aisb));
+
+ kvm = kvm_s390_pci_si_to_kvm(aift, si);
+ if (!kvm)
+ return;
+ gi = &kvm->arch.gisa_int;
+
+ if (!(gi->origin->g1.simm & AIS_MODE_MASK(gaite->gisc)) ||
+ !(gi->origin->g1.nimm & AIS_MODE_MASK(gaite->gisc))) {
+ gisa_set_ipm_gisc(gi->origin, gaite->gisc);
+ if (hrtimer_active(&gi->timer))
+ hrtimer_cancel(&gi->timer);
+ hrtimer_start(&gi->timer, 0, HRTIMER_MODE_REL);
+ kvm->stat.aen_forward++;
+ }
+}
+
+static void aen_process_gait(u8 isc)
+{
+ bool found = false, first = true;
+ union zpci_sic_iib iib = {{0}};
+ unsigned long si, flags;
+
+ spin_lock_irqsave(&aift->gait_lock, flags);
+
+ if (!aift->gait) {
+ spin_unlock_irqrestore(&aift->gait_lock, flags);
+ return;
+ }
+
+ for (si = 0;;) {
+ /* Scan adapter summary indicator bit vector */
+ si = airq_iv_scan(aift->sbv, si, airq_iv_end(aift->sbv));
+ if (si == -1UL) {
+ if (first || found) {
+ /* Re-enable interrupts. */
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, isc,
+ &iib);
+ first = found = false;
+ } else {
+ /* Interrupts on and all bits processed */
+ break;
+ }
+ found = false;
+ si = 0;
+ /* Scan again after re-enabling interrupts */
+ continue;
+ }
+ found = true;
+ aen_host_forward(si);
+ }
+
+ spin_unlock_irqrestore(&aift->gait_lock, flags);
+}
+
+static void gib_alert_irq_handler(struct airq_struct *airq,
+ struct tpi_info *tpi_info)
+{
+ struct tpi_adapter_info *info = (struct tpi_adapter_info *)tpi_info;
+
inc_irq_stat(IRQIO_GAL);
- process_gib_alert_list();
+
+ if ((info->forward || info->error) &&
+ IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
+ aen_process_gait(info->isc);
+ if (info->aism != 0)
+ process_gib_alert_list();
+ } else {
+ process_gib_alert_list();
+ }
}
static struct airq_struct gib_alert_irq = {
@@ -3326,6 +3405,11 @@ void kvm_s390_gib_destroy(void)
{
if (!gib)
return;
+ if (kvm_s390_pci_interp_allowed() && aift) {
+ mutex_lock(&aift->aift_lock);
+ kvm_s390_pci_aen_exit();
+ mutex_unlock(&aift->aift_lock);
+ }
chsc_sgib(0);
unregister_adapter_interrupt(&gib_alert_irq);
free_page((unsigned long)gib);
@@ -3363,6 +3447,14 @@ int kvm_s390_gib_init(u8 nisc)
goto out_unreg_gal;
}
+ if (kvm_s390_pci_interp_allowed()) {
+ if (kvm_s390_pci_aen_init(nisc)) {
+ pr_err("Initializing AEN for PCI failed\n");
+ rc = -EIO;
+ goto out_unreg_gal;
+ }
+ }
+
KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc);
goto out;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 8fcb56141689..45d4b8182b07 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -31,6 +31,7 @@
#include <linux/sched/signal.h>
#include <linux/string.h>
#include <linux/pgtable.h>
+#include <linux/mmu_notifier.h>
#include <asm/asm-offsets.h>
#include <asm/lowcore.h>
@@ -47,6 +48,7 @@
#include <asm/fpu/api.h>
#include "kvm-s390.h"
#include "gaccess.h"
+#include "pci.h"
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -63,7 +65,8 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
STATS_DESC_COUNTER(VM, inject_float_mchk),
STATS_DESC_COUNTER(VM, inject_pfault_done),
STATS_DESC_COUNTER(VM, inject_service_signal),
- STATS_DESC_COUNTER(VM, inject_virtio)
+ STATS_DESC_COUNTER(VM, inject_virtio),
+ STATS_DESC_COUNTER(VM, aen_forward)
};
const struct kvm_stats_header kvm_vm_stats_header = {
@@ -502,6 +505,14 @@ int kvm_arch_init(void *opaque)
goto out;
}
+ if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
+ rc = kvm_s390_pci_init();
+ if (rc) {
+ pr_err("Unable to allocate AIFT for PCI\n");
+ goto out;
+ }
+ }
+
rc = kvm_s390_gib_init(GAL_ISC);
if (rc)
goto out;
@@ -516,6 +527,8 @@ out:
void kvm_arch_exit(void)
{
kvm_s390_gib_destroy();
+ if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
+ kvm_s390_pci_exit();
debug_unregister(kvm_s390_dbf);
debug_unregister(kvm_s390_dbf_uv);
}
@@ -606,6 +619,32 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_PROTECTED:
r = is_prot_virt_host();
break;
+ case KVM_CAP_S390_PROTECTED_DUMP: {
+ u64 pv_cmds_dump[] = {
+ BIT_UVC_CMD_DUMP_INIT,
+ BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
+ BIT_UVC_CMD_DUMP_CPU,
+ BIT_UVC_CMD_DUMP_COMPLETE,
+ };
+ int i;
+
+ r = is_prot_virt_host();
+
+ for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
+ if (!test_bit_inv(pv_cmds_dump[i],
+ (unsigned long *)&uv_info.inst_calls_list)) {
+ r = 0;
+ break;
+ }
+ }
+ break;
+ }
+ case KVM_CAP_S390_ZPCI_OP:
+ r = kvm_s390_pci_interp_allowed();
+ break;
+ case KVM_CAP_S390_CPU_TOPOLOGY:
+ r = test_facility(11);
+ break;
default:
r = 0;
}
@@ -817,6 +856,20 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
icpt_operexc_on_all_vcpus(kvm);
r = 0;
break;
+ case KVM_CAP_S390_CPU_TOPOLOGY:
+ r = -EINVAL;
+ mutex_lock(&kvm->lock);
+ if (kvm->created_vcpus) {
+ r = -EBUSY;
+ } else if (test_facility(11)) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 11);
+ set_kvm_facility(kvm->arch.model.fac_list, 11);
+ r = 0;
+ }
+ mutex_unlock(&kvm->lock);
+ VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
+ r ? "(not available)" : "(success)");
+ break;
default:
r = -EINVAL;
break;
@@ -1019,6 +1072,42 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
return 0;
}
+static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
+{
+ /* Only set the ECB bits after guest requests zPCI interpretation */
+ if (!vcpu->kvm->arch.use_zpci_interp)
+ return;
+
+ vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
+ vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
+}
+
+void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ unsigned long i;
+
+ lockdep_assert_held(&kvm->lock);
+
+ if (!kvm_s390_pci_interp_allowed())
+ return;
+
+ /*
+ * If host is configured for PCI and the necessary facilities are
+ * available, turn on interpretation for the life of this guest
+ */
+ kvm->arch.use_zpci_interp = 1;
+
+ kvm_s390_vcpu_block_all(kvm);
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ kvm_s390_vcpu_pci_setup(vcpu);
+ kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
+ }
+
+ kvm_s390_vcpu_unblock_all(kvm);
+}
+
static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
{
unsigned long cx;
@@ -1691,6 +1780,57 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
return ret;
}
+/**
+ * kvm_s390_update_topology_change_report - update CPU topology change report
+ * @kvm: guest KVM description
+ * @val: set or clear the MTCR bit
+ *
+ * Updates the Multiprocessor Topology-Change-Report bit to signal
+ * the guest with a topology change.
+ * This is only relevant if the topology facility is present.
+ *
+ * The SCA version, bsca or esca, doesn't matter as offset is the same.
+ */
+static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
+{
+ union sca_utility new, old;
+ struct bsca_block *sca;
+
+ read_lock(&kvm->arch.sca_lock);
+ sca = kvm->arch.sca;
+ do {
+ old = READ_ONCE(sca->utility);
+ new = old;
+ new.mtcr = val;
+ } while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
+ read_unlock(&kvm->arch.sca_lock);
+}
+
+static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ if (!test_kvm_facility(kvm, 11))
+ return -ENXIO;
+
+ kvm_s390_update_topology_change_report(kvm, !!attr->attr);
+ return 0;
+}
+
+static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ u8 topo;
+
+ if (!test_kvm_facility(kvm, 11))
+ return -ENXIO;
+
+ read_lock(&kvm->arch.sca_lock);
+ topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
+ read_unlock(&kvm->arch.sca_lock);
+
+ return put_user(topo, (u8 __user *)attr->addr);
+}
+
static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret;
@@ -1711,6 +1851,9 @@ static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_MIGRATION:
ret = kvm_s390_vm_set_migration(kvm, attr);
break;
+ case KVM_S390_VM_CPU_TOPOLOGY:
+ ret = kvm_s390_set_topo_change_indication(kvm, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -1736,6 +1879,9 @@ static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_MIGRATION:
ret = kvm_s390_vm_get_migration(kvm, attr);
break;
+ case KVM_S390_VM_CPU_TOPOLOGY:
+ ret = kvm_s390_get_topo_change_indication(kvm, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -1809,6 +1955,9 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_MIGRATION:
ret = 0;
break;
+ case KVM_S390_VM_CPU_TOPOLOGY:
+ ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
+ break;
default:
ret = -ENXIO;
break;
@@ -2166,12 +2315,25 @@ out:
return r;
}
-static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
+/**
+ * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
+ * non protected.
+ * @kvm: the VM whose protected vCPUs are to be converted
+ * @rc: return value for the RC field of the UVC (in case of error)
+ * @rrc: return value for the RRC field of the UVC (in case of error)
+ *
+ * Does not stop in case of error, tries to convert as many
+ * CPUs as possible. In case of error, the RC and RRC of the last error are
+ * returned.
+ *
+ * Return: 0 in case of success, otherwise -EIO
+ */
+int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
{
struct kvm_vcpu *vcpu;
- u16 rc, rrc;
- int ret = 0;
unsigned long i;
+ u16 _rc, _rrc;
+ int ret = 0;
/*
* We ignore failures and try to destroy as many CPUs as possible.
@@ -2183,9 +2345,9 @@ static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
*/
kvm_for_each_vcpu(i, vcpu, kvm) {
mutex_lock(&vcpu->mutex);
- if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
- *rcp = rc;
- *rrcp = rrc;
+ if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
+ *rc = _rc;
+ *rrc = _rrc;
ret = -EIO;
}
mutex_unlock(&vcpu->mutex);
@@ -2196,6 +2358,17 @@ static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
return ret;
}
+/**
+ * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
+ * to protected.
+ * @kvm: the VM whose protected vCPUs are to be converted
+ * @rc: return value for the RC field of the UVC (in case of error)
+ * @rrc: return value for the RRC field of the UVC (in case of error)
+ *
+ * Tries to undo the conversion in case of error.
+ *
+ * Return: 0 in case of success, otherwise -EIO
+ */
static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
{
unsigned long i;
@@ -2220,6 +2393,115 @@ static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
return r;
}
+/*
+ * Here we provide user space with a direct interface to query UV
+ * related data like UV maxima and available features as well as
+ * feature specific data.
+ *
+ * To facilitate future extension of the data structures we'll try to
+ * write data up to the maximum requested length.
+ */
+static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
+{
+ ssize_t len_min;
+
+ switch (info->header.id) {
+ case KVM_PV_INFO_VM: {
+ len_min = sizeof(info->header) + sizeof(info->vm);
+
+ if (info->header.len_max < len_min)
+ return -EINVAL;
+
+ memcpy(info->vm.inst_calls_list,
+ uv_info.inst_calls_list,
+ sizeof(uv_info.inst_calls_list));
+
+ /* It's max cpuid not max cpus, so it's off by one */
+ info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
+ info->vm.max_guests = uv_info.max_num_sec_conf;
+ info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
+ info->vm.feature_indication = uv_info.uv_feature_indications;
+
+ return len_min;
+ }
+ case KVM_PV_INFO_DUMP: {
+ len_min = sizeof(info->header) + sizeof(info->dump);
+
+ if (info->header.len_max < len_min)
+ return -EINVAL;
+
+ info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
+ info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
+ info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
+ return len_min;
+ }
+ default:
+ return -EINVAL;
+ }
+}
+
+static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
+ struct kvm_s390_pv_dmp dmp)
+{
+ int r = -EINVAL;
+ void __user *result_buff = (void __user *)dmp.buff_addr;
+
+ switch (dmp.subcmd) {
+ case KVM_PV_DUMP_INIT: {
+ if (kvm->arch.pv.dumping)
+ break;
+
+ /*
+ * Block SIE entry as concurrent dump UVCs could lead
+ * to validities.
+ */
+ kvm_s390_vcpu_block_all(kvm);
+
+ r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
+ cmd->rc, cmd->rrc);
+ if (!r) {
+ kvm->arch.pv.dumping = true;
+ } else {
+ kvm_s390_vcpu_unblock_all(kvm);
+ r = -EINVAL;
+ }
+ break;
+ }
+ case KVM_PV_DUMP_CONFIG_STOR_STATE: {
+ if (!kvm->arch.pv.dumping)
+ break;
+
+ /*
+ * gaddr is an output parameter since we might stop
+ * early. As dmp will be copied back in our caller, we
+ * don't need to do it ourselves.
+ */
+ r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
+ &cmd->rc, &cmd->rrc);
+ break;
+ }
+ case KVM_PV_DUMP_COMPLETE: {
+ if (!kvm->arch.pv.dumping)
+ break;
+
+ r = -EINVAL;
+ if (dmp.buff_len < uv_info.conf_dump_finalize_len)
+ break;
+
+ r = kvm_s390_pv_dump_complete(kvm, result_buff,
+ &cmd->rc, &cmd->rrc);
+ break;
+ }
+ default:
+ r = -ENOTTY;
+ break;
+ }
+
+ return r;
+}
+
static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
{
int r = 0;
@@ -2356,6 +2638,68 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
cmd->rc, cmd->rrc);
break;
}
+ case KVM_PV_INFO: {
+ struct kvm_s390_pv_info info = {};
+ ssize_t data_len;
+
+ /*
+ * No need to check the VM protection here.
+ *
+ * Maybe user space wants to query some of the data
+ * when the VM is still unprotected. If we see the
+ * need to fence a new data command we can still
+ * return an error in the info handler.
+ */
+
+ r = -EFAULT;
+ if (copy_from_user(&info, argp, sizeof(info.header)))
+ break;
+
+ r = -EINVAL;
+ if (info.header.len_max < sizeof(info.header))
+ break;
+
+ data_len = kvm_s390_handle_pv_info(&info);
+ if (data_len < 0) {
+ r = data_len;
+ break;
+ }
+ /*
+ * If a data command struct is extended (multiple
+ * times) this can be used to determine how much of it
+ * is valid.
+ */
+ info.header.len_written = data_len;
+
+ r = -EFAULT;
+ if (copy_to_user(argp, &info, data_len))
+ break;
+
+ r = 0;
+ break;
+ }
+ case KVM_PV_DUMP: {
+ struct kvm_s390_pv_dmp dmp;
+
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = -EFAULT;
+ if (copy_from_user(&dmp, argp, sizeof(dmp)))
+ break;
+
+ r = kvm_s390_pv_dmp(kvm, cmd, dmp);
+ if (r)
+ break;
+
+ if (copy_to_user(argp, &dmp, sizeof(dmp))) {
+ r = -EFAULT;
+ break;
+ }
+
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -2581,6 +2925,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = -EFAULT;
break;
}
+ case KVM_S390_ZPCI_OP: {
+ struct kvm_s390_zpci_op args;
+
+ r = -EINVAL;
+ if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
+ break;
+ if (copy_from_user(&args, argp, sizeof(args))) {
+ r = -EFAULT;
+ break;
+ }
+ r = kvm_s390_pci_zpci_op(kvm, &args);
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -2742,6 +3099,14 @@ static void sca_dispose(struct kvm *kvm)
kvm->arch.sca = NULL;
}
+void kvm_arch_free_vm(struct kvm *kvm)
+{
+ if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
+ kvm_s390_pci_clear_list(kvm);
+
+ __kvm_arch_free_vm(kvm);
+}
+
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
@@ -2824,6 +3189,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_s390_crypto_init(kvm);
+ if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
+ mutex_lock(&kvm->lock);
+ kvm_s390_pci_init_list(kvm);
+ kvm_s390_vcpu_pci_enable_interp(kvm);
+ mutex_unlock(&kvm->lock);
+ }
+
mutex_init(&kvm->arch.float_int.ais_lock);
spin_lock_init(&kvm->arch.float_int.lock);
for (i = 0; i < FIRQ_LIST_COUNT; i++)
@@ -2877,6 +3249,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_clear_async_pf_completion_queue(vcpu);
if (!kvm_is_ucontrol(vcpu->kvm))
sca_del_vcpu(vcpu);
+ kvm_s390_update_topology_change_report(vcpu->kvm, 1);
if (kvm_is_ucontrol(vcpu->kvm))
gmap_remove(vcpu->arch.gmap);
@@ -2904,6 +3277,15 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
*/
if (kvm_s390_pv_get_handle(kvm))
kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
+ /*
+ * Remove the mmu notifier only when the whole KVM VM is torn down,
+ * and only if one was registered to begin with. If the VM is
+ * currently not protected, but has been previously been protected,
+ * then it's possible that the notifier is still registered.
+ */
+ if (kvm->arch.pv.mmu_notifier.ops)
+ mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
+
debug_unregister(kvm->arch.dbf);
free_page((unsigned long)kvm->arch.sie_page2);
if (!kvm_is_ucontrol(kvm))
@@ -3047,9 +3429,7 @@ static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
if (!sclp.has_esca || !sclp.has_64bscao)
return false;
- mutex_lock(&kvm->lock);
rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
- mutex_unlock(&kvm->lock);
return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
}
@@ -3272,6 +3652,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
if (test_kvm_facility(vcpu->kvm, 9))
vcpu->arch.sie_block->ecb |= ECB_SRSI;
+ if (test_kvm_facility(vcpu->kvm, 11))
+ vcpu->arch.sie_block->ecb |= ECB_PTF;
if (test_kvm_facility(vcpu->kvm, 73))
vcpu->arch.sie_block->ecb |= ECB_TE;
if (!kvm_is_ucontrol(vcpu->kvm))
@@ -3324,6 +3706,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
kvm_s390_vcpu_crypto_setup(vcpu);
+ kvm_s390_vcpu_pci_setup(vcpu);
+
mutex_lock(&vcpu->kvm->lock);
if (kvm_s390_pv_is_protected(vcpu->kvm)) {
rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
@@ -3403,6 +3787,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
rc = kvm_s390_vcpu_setup(vcpu);
if (rc)
goto out_ucontrol_uninit;
+
+ kvm_s390_update_topology_change_report(vcpu->kvm, 1);
return 0;
out_ucontrol_uninit:
@@ -3957,8 +4343,6 @@ retry:
goto retry;
}
- /* nothing to do, just clear the request */
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
/* we left the vsie handler, nothing to do, just clear the request */
kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
@@ -4473,6 +4857,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
struct kvm_run *kvm_run = vcpu->run;
int rc;
+ /*
+ * Running a VM while dumping always has the potential to
+ * produce inconsistent dump data. But for PV vcpus a SIE
+ * entry while dumping could also lead to a fatal validity
+ * intercept which we absolutely want to avoid.
+ */
+ if (vcpu->kvm->arch.pv.dumping)
+ return -EINVAL;
+
if (kvm_run->immediate_exit)
return -EINTR;
@@ -4912,6 +5305,48 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
return -ENOIOCTLCMD;
}
+static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
+ struct kvm_pv_cmd *cmd)
+{
+ struct kvm_s390_pv_dmp dmp;
+ void *data;
+ int ret;
+
+ /* Dump initialization is a prerequisite */
+ if (!vcpu->kvm->arch.pv.dumping)
+ return -EINVAL;
+
+ if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
+ return -EFAULT;
+
+ /* We only handle this subcmd right now */
+ if (dmp.subcmd != KVM_PV_DUMP_CPU)
+ return -EINVAL;
+
+ /* CPU dump length is the same as create cpu storage donation. */
+ if (dmp.buff_len != uv_info.guest_cpu_stor_len)
+ return -EINVAL;
+
+ data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
+
+ VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
+ vcpu->vcpu_id, cmd->rc, cmd->rrc);
+
+ if (ret)
+ ret = -EINVAL;
+
+ /* On success copy over the dump data */
+ if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
+ ret = -EFAULT;
+
+ kvfree(data);
+ return ret;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -5076,6 +5511,33 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
irq_state.len);
break;
}
+ case KVM_S390_PV_CPU_COMMAND: {
+ struct kvm_pv_cmd cmd;
+
+ r = -EINVAL;
+ if (!is_prot_virt_host())
+ break;
+
+ r = -EFAULT;
+ if (copy_from_user(&cmd, argp, sizeof(cmd)))
+ break;
+
+ r = -EINVAL;
+ if (cmd.flags)
+ break;
+
+ /* We only handle this cmd right now */
+ if (cmd.cmd != KVM_PV_DUMP)
+ break;
+
+ r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
+
+ /* Always copy over UV rc / rrc data */
+ if (copy_to_user((__u8 __user *)argp, &cmd.rc,
+ sizeof(cmd.rc) + sizeof(cmd.rrc)))
+ r = -EFAULT;
+ break;
+ }
default:
r = -ENOTTY;
}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 497d52a83c78..f6fd668f887e 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -250,6 +250,11 @@ int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
unsigned long tweak, u16 *rc, u16 *rrc);
int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state);
+int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc);
+int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
+ u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc);
+int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
+ u16 *rc, u16 *rrc);
static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
{
@@ -374,6 +379,7 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
+int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc);
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
@@ -508,6 +514,16 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm);
/**
+ * kvm_s390_vcpu_pci_enable_interp
+ *
+ * Set the associated PCI attributes for each vcpu to allow for zPCI Load/Store
+ * interpretation as well as adapter interruption forwarding.
+ *
+ * @kvm: the KVM guest
+ */
+void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm);
+
+/**
* diag9c_forwarding_hz
*
* Set the maximum number of diag9c forwarding per second
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
new file mode 100644
index 000000000000..c50c1645c0ae
--- /dev/null
+++ b/arch/s390/kvm/pci.c
@@ -0,0 +1,702 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * s390 kvm PCI passthrough support
+ *
+ * Copyright IBM Corp. 2022
+ *
+ * Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <asm/pci.h>
+#include <asm/pci_insn.h>
+#include <asm/pci_io.h>
+#include <asm/sclp.h>
+#include "pci.h"
+#include "kvm-s390.h"
+
+struct zpci_aift *aift;
+
+static inline int __set_irq_noiib(u16 ctl, u8 isc)
+{
+ union zpci_sic_iib iib = {{0}};
+
+ return zpci_set_irq_ctrl(ctl, isc, &iib);
+}
+
+void kvm_s390_pci_aen_exit(void)
+{
+ unsigned long flags;
+ struct kvm_zdev **gait_kzdev;
+
+ lockdep_assert_held(&aift->aift_lock);
+
+ /*
+ * Contents of the aipb remain registered for the life of the host
+ * kernel, the information preserved in zpci_aipb and zpci_aif_sbv
+ * in case we insert the KVM module again later. Clear the AIFT
+ * information and free anything not registered with underlying
+ * firmware.
+ */
+ spin_lock_irqsave(&aift->gait_lock, flags);
+ gait_kzdev = aift->kzdev;
+ aift->gait = NULL;
+ aift->sbv = NULL;
+ aift->kzdev = NULL;
+ spin_unlock_irqrestore(&aift->gait_lock, flags);
+
+ kfree(gait_kzdev);
+}
+
+static int zpci_setup_aipb(u8 nisc)
+{
+ struct page *page;
+ int size, rc;
+
+ zpci_aipb = kzalloc(sizeof(union zpci_sic_iib), GFP_KERNEL);
+ if (!zpci_aipb)
+ return -ENOMEM;
+
+ aift->sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL);
+ if (!aift->sbv) {
+ rc = -ENOMEM;
+ goto free_aipb;
+ }
+ zpci_aif_sbv = aift->sbv;
+ size = get_order(PAGE_ALIGN(ZPCI_NR_DEVICES *
+ sizeof(struct zpci_gaite)));
+ page = alloc_pages(GFP_KERNEL | __GFP_ZERO, size);
+ if (!page) {
+ rc = -ENOMEM;
+ goto free_sbv;
+ }
+ aift->gait = (struct zpci_gaite *)page_to_virt(page);
+
+ zpci_aipb->aipb.faisb = virt_to_phys(aift->sbv->vector);
+ zpci_aipb->aipb.gait = virt_to_phys(aift->gait);
+ zpci_aipb->aipb.afi = nisc;
+ zpci_aipb->aipb.faal = ZPCI_NR_DEVICES;
+
+ /* Setup Adapter Event Notification Interpretation */
+ if (zpci_set_irq_ctrl(SIC_SET_AENI_CONTROLS, 0, zpci_aipb)) {
+ rc = -EIO;
+ goto free_gait;
+ }
+
+ return 0;
+
+free_gait:
+ free_pages((unsigned long)aift->gait, size);
+free_sbv:
+ airq_iv_release(aift->sbv);
+ zpci_aif_sbv = NULL;
+free_aipb:
+ kfree(zpci_aipb);
+ zpci_aipb = NULL;
+
+ return rc;
+}
+
+static int zpci_reset_aipb(u8 nisc)
+{
+ /*
+ * AEN registration can only happen once per system boot. If
+ * an aipb already exists then AEN was already registered and
+ * we can re-use the aipb contents. This can only happen if
+ * the KVM module was removed and re-inserted. However, we must
+ * ensure that the same forwarding ISC is used as this is assigned
+ * during KVM module load.
+ */
+ if (zpci_aipb->aipb.afi != nisc)
+ return -EINVAL;
+
+ aift->sbv = zpci_aif_sbv;
+ aift->gait = (struct zpci_gaite *)zpci_aipb->aipb.gait;
+
+ return 0;
+}
+
+int kvm_s390_pci_aen_init(u8 nisc)
+{
+ int rc = 0;
+
+ /* If already enabled for AEN, bail out now */
+ if (aift->gait || aift->sbv)
+ return -EPERM;
+
+ mutex_lock(&aift->aift_lock);
+ aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev),
+ GFP_KERNEL);
+ if (!aift->kzdev) {
+ rc = -ENOMEM;
+ goto unlock;
+ }
+
+ if (!zpci_aipb)
+ rc = zpci_setup_aipb(nisc);
+ else
+ rc = zpci_reset_aipb(nisc);
+ if (rc)
+ goto free_zdev;
+
+ /* Enable floating IRQs */
+ if (__set_irq_noiib(SIC_IRQ_MODE_SINGLE, nisc)) {
+ rc = -EIO;
+ kvm_s390_pci_aen_exit();
+ }
+
+ goto unlock;
+
+free_zdev:
+ kfree(aift->kzdev);
+unlock:
+ mutex_unlock(&aift->aift_lock);
+ return rc;
+}
+
+/* Modify PCI: Register floating adapter interruption forwarding */
+static int kvm_zpci_set_airq(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
+ struct zpci_fib fib = {};
+ u8 status;
+
+ fib.fmt0.isc = zdev->kzdev->fib.fmt0.isc;
+ fib.fmt0.sum = 1; /* enable summary notifications */
+ fib.fmt0.noi = airq_iv_end(zdev->aibv);
+ fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
+ fib.fmt0.aibvo = 0;
+ fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
+ fib.fmt0.aisbo = zdev->aisb & 63;
+ fib.gd = zdev->gisa;
+
+ return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
+}
+
+/* Modify PCI: Unregister floating adapter interruption forwarding */
+static int kvm_zpci_clear_airq(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
+ struct zpci_fib fib = {};
+ u8 cc, status;
+
+ fib.gd = zdev->gisa;
+
+ cc = zpci_mod_fc(req, &fib, &status);
+ if (cc == 3 || (cc == 1 && status == 24))
+ /* Function already gone or IRQs already deregistered. */
+ cc = 0;
+
+ return cc ? -EIO : 0;
+}
+
+static inline void unaccount_mem(unsigned long nr_pages)
+{
+ struct user_struct *user = get_uid(current_user());
+
+ if (user)
+ atomic_long_sub(nr_pages, &user->locked_vm);
+ if (current->mm)
+ atomic64_sub(nr_pages, &current->mm->pinned_vm);
+}
+
+static inline int account_mem(unsigned long nr_pages)
+{
+ struct user_struct *user = get_uid(current_user());
+ unsigned long page_limit, cur_pages, new_pages;
+
+ page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+ do {
+ cur_pages = atomic_long_read(&user->locked_vm);
+ new_pages = cur_pages + nr_pages;
+ if (new_pages > page_limit)
+ return -ENOMEM;
+ } while (atomic_long_cmpxchg(&user->locked_vm, cur_pages,
+ new_pages) != cur_pages);
+
+ atomic64_add(nr_pages, &current->mm->pinned_vm);
+
+ return 0;
+}
+
+static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
+ bool assist)
+{
+ struct page *pages[1], *aibv_page, *aisb_page = NULL;
+ unsigned int msi_vecs, idx;
+ struct zpci_gaite *gaite;
+ unsigned long hva, bit;
+ struct kvm *kvm;
+ phys_addr_t gaddr;
+ int rc = 0, gisc, npages, pcount = 0;
+
+ /*
+ * Interrupt forwarding is only applicable if the device is already
+ * enabled for interpretation
+ */
+ if (zdev->gisa == 0)
+ return -EINVAL;
+
+ kvm = zdev->kzdev->kvm;
+ msi_vecs = min_t(unsigned int, fib->fmt0.noi, zdev->max_msi);
+
+ /* Get the associated forwarding ISC - if invalid, return the error */
+ gisc = kvm_s390_gisc_register(kvm, fib->fmt0.isc);
+ if (gisc < 0)
+ return gisc;
+
+ /* Replace AIBV address */
+ idx = srcu_read_lock(&kvm->srcu);
+ hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aibv));
+ npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM, pages);
+ srcu_read_unlock(&kvm->srcu, idx);
+ if (npages < 1) {
+ rc = -EIO;
+ goto out;
+ }
+ aibv_page = pages[0];
+ pcount++;
+ gaddr = page_to_phys(aibv_page) + (fib->fmt0.aibv & ~PAGE_MASK);
+ fib->fmt0.aibv = gaddr;
+
+ /* Pin the guest AISB if one was specified */
+ if (fib->fmt0.sum == 1) {
+ idx = srcu_read_lock(&kvm->srcu);
+ hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aisb));
+ npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM,
+ pages);
+ srcu_read_unlock(&kvm->srcu, idx);
+ if (npages < 1) {
+ rc = -EIO;
+ goto unpin1;
+ }
+ aisb_page = pages[0];
+ pcount++;
+ }
+
+ /* Account for pinned pages, roll back on failure */
+ if (account_mem(pcount))
+ goto unpin2;
+
+ /* AISB must be allocated before we can fill in GAITE */
+ mutex_lock(&aift->aift_lock);
+ bit = airq_iv_alloc_bit(aift->sbv);
+ if (bit == -1UL)
+ goto unlock;
+ zdev->aisb = bit; /* store the summary bit number */
+ zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA |
+ AIRQ_IV_BITLOCK |
+ AIRQ_IV_GUESTVEC,
+ phys_to_virt(fib->fmt0.aibv));
+
+ spin_lock_irq(&aift->gait_lock);
+ gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
+ sizeof(struct zpci_gaite));
+
+ /* If assist not requested, host will get all alerts */
+ if (assist)
+ gaite->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
+ else
+ gaite->gisa = 0;
+
+ gaite->gisc = fib->fmt0.isc;
+ gaite->count++;
+ gaite->aisbo = fib->fmt0.aisbo;
+ gaite->aisb = virt_to_phys(page_address(aisb_page) + (fib->fmt0.aisb &
+ ~PAGE_MASK));
+ aift->kzdev[zdev->aisb] = zdev->kzdev;
+ spin_unlock_irq(&aift->gait_lock);
+
+ /* Update guest FIB for re-issue */
+ fib->fmt0.aisbo = zdev->aisb & 63;
+ fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
+ fib->fmt0.isc = gisc;
+
+ /* Save some guest fib values in the host for later use */
+ zdev->kzdev->fib.fmt0.isc = fib->fmt0.isc;
+ zdev->kzdev->fib.fmt0.aibv = fib->fmt0.aibv;
+ mutex_unlock(&aift->aift_lock);
+
+ /* Issue the clp to setup the irq now */
+ rc = kvm_zpci_set_airq(zdev);
+ return rc;
+
+unlock:
+ mutex_unlock(&aift->aift_lock);
+unpin2:
+ if (fib->fmt0.sum == 1)
+ unpin_user_page(aisb_page);
+unpin1:
+ unpin_user_page(aibv_page);
+out:
+ return rc;
+}
+
+static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force)
+{
+ struct kvm_zdev *kzdev = zdev->kzdev;
+ struct zpci_gaite *gaite;
+ struct page *vpage = NULL, *spage = NULL;
+ int rc, pcount = 0;
+ u8 isc;
+
+ if (zdev->gisa == 0)
+ return -EINVAL;
+
+ mutex_lock(&aift->aift_lock);
+
+ /*
+ * If the clear fails due to an error, leave now unless we know this
+ * device is about to go away (force) -- In that case clear the GAITE
+ * regardless.
+ */
+ rc = kvm_zpci_clear_airq(zdev);
+ if (rc && !force)
+ goto out;
+
+ if (zdev->kzdev->fib.fmt0.aibv == 0)
+ goto out;
+ spin_lock_irq(&aift->gait_lock);
+ gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
+ sizeof(struct zpci_gaite));
+ isc = gaite->gisc;
+ gaite->count--;
+ if (gaite->count == 0) {
+ /* Release guest AIBV and AISB */
+ vpage = phys_to_page(kzdev->fib.fmt0.aibv);
+ if (gaite->aisb != 0)
+ spage = phys_to_page(gaite->aisb);
+ /* Clear the GAIT entry */
+ gaite->aisb = 0;
+ gaite->gisc = 0;
+ gaite->aisbo = 0;
+ gaite->gisa = 0;
+ aift->kzdev[zdev->aisb] = NULL;
+ /* Clear zdev info */
+ airq_iv_free_bit(aift->sbv, zdev->aisb);
+ airq_iv_release(zdev->aibv);
+ zdev->aisb = 0;
+ zdev->aibv = NULL;
+ }
+ spin_unlock_irq(&aift->gait_lock);
+ kvm_s390_gisc_unregister(kzdev->kvm, isc);
+ kzdev->fib.fmt0.isc = 0;
+ kzdev->fib.fmt0.aibv = 0;
+
+ if (vpage) {
+ unpin_user_page(vpage);
+ pcount++;
+ }
+ if (spage) {
+ unpin_user_page(spage);
+ pcount++;
+ }
+ if (pcount > 0)
+ unaccount_mem(pcount);
+out:
+ mutex_unlock(&aift->aift_lock);
+
+ return rc;
+}
+
+static int kvm_s390_pci_dev_open(struct zpci_dev *zdev)
+{
+ struct kvm_zdev *kzdev;
+
+ kzdev = kzalloc(sizeof(struct kvm_zdev), GFP_KERNEL);
+ if (!kzdev)
+ return -ENOMEM;
+
+ kzdev->zdev = zdev;
+ zdev->kzdev = kzdev;
+
+ return 0;
+}
+
+static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
+{
+ struct kvm_zdev *kzdev;
+
+ kzdev = zdev->kzdev;
+ WARN_ON(kzdev->zdev != zdev);
+ zdev->kzdev = NULL;
+ kfree(kzdev);
+}
+
+
+/*
+ * Register device with the specified KVM. If interpetation facilities are
+ * available, enable them and let userspace indicate whether or not they will
+ * be used (specify SHM bit to disable).
+ */
+static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
+{
+ struct zpci_dev *zdev = opaque;
+ int rc;
+
+ if (!zdev)
+ return -EINVAL;
+
+ mutex_lock(&zdev->kzdev_lock);
+
+ if (zdev->kzdev || zdev->gisa != 0 || !kvm) {
+ mutex_unlock(&zdev->kzdev_lock);
+ return -EINVAL;
+ }
+
+ kvm_get_kvm(kvm);
+
+ mutex_lock(&kvm->lock);
+
+ rc = kvm_s390_pci_dev_open(zdev);
+ if (rc)
+ goto err;
+
+ /*
+ * If interpretation facilities aren't available, add the device to
+ * the kzdev list but don't enable for interpretation.
+ */
+ if (!kvm_s390_pci_interp_allowed())
+ goto out;
+
+ /*
+ * If this is the first request to use an interpreted device, make the
+ * necessary vcpu changes
+ */
+ if (!kvm->arch.use_zpci_interp)
+ kvm_s390_vcpu_pci_enable_interp(kvm);
+
+ if (zdev_enabled(zdev)) {
+ rc = zpci_disable_device(zdev);
+ if (rc)
+ goto err;
+ }
+
+ /*
+ * Store information about the identity of the kvm guest allowed to
+ * access this device via interpretation to be used by host CLP
+ */
+ zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
+
+ rc = zpci_enable_device(zdev);
+ if (rc)
+ goto clear_gisa;
+
+ /* Re-register the IOMMU that was already created */
+ rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+ virt_to_phys(zdev->dma_table));
+ if (rc)
+ goto clear_gisa;
+
+out:
+ zdev->kzdev->kvm = kvm;
+
+ spin_lock(&kvm->arch.kzdev_list_lock);
+ list_add_tail(&zdev->kzdev->entry, &kvm->arch.kzdev_list);
+ spin_unlock(&kvm->arch.kzdev_list_lock);
+
+ mutex_unlock(&kvm->lock);
+ mutex_unlock(&zdev->kzdev_lock);
+ return 0;
+
+clear_gisa:
+ zdev->gisa = 0;
+err:
+ if (zdev->kzdev)
+ kvm_s390_pci_dev_release(zdev);
+ mutex_unlock(&kvm->lock);
+ mutex_unlock(&zdev->kzdev_lock);
+ kvm_put_kvm(kvm);
+ return rc;
+}
+
+static void kvm_s390_pci_unregister_kvm(void *opaque)
+{
+ struct zpci_dev *zdev = opaque;
+ struct kvm *kvm;
+
+ if (!zdev)
+ return;
+
+ mutex_lock(&zdev->kzdev_lock);
+
+ if (WARN_ON(!zdev->kzdev)) {
+ mutex_unlock(&zdev->kzdev_lock);
+ return;
+ }
+
+ kvm = zdev->kzdev->kvm;
+ mutex_lock(&kvm->lock);
+
+ /*
+ * A 0 gisa means interpretation was never enabled, just remove the
+ * device from the list.
+ */
+ if (zdev->gisa == 0)
+ goto out;
+
+ /* Forwarding must be turned off before interpretation */
+ if (zdev->kzdev->fib.fmt0.aibv != 0)
+ kvm_s390_pci_aif_disable(zdev, true);
+
+ /* Remove the host CLP guest designation */
+ zdev->gisa = 0;
+
+ if (zdev_enabled(zdev)) {
+ if (zpci_disable_device(zdev))
+ goto out;
+ }
+
+ if (zpci_enable_device(zdev))
+ goto out;
+
+ /* Re-register the IOMMU that was already created */
+ zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+ virt_to_phys(zdev->dma_table));
+
+out:
+ spin_lock(&kvm->arch.kzdev_list_lock);
+ list_del(&zdev->kzdev->entry);
+ spin_unlock(&kvm->arch.kzdev_list_lock);
+ kvm_s390_pci_dev_release(zdev);
+
+ mutex_unlock(&kvm->lock);
+ mutex_unlock(&zdev->kzdev_lock);
+
+ kvm_put_kvm(kvm);
+}
+
+void kvm_s390_pci_init_list(struct kvm *kvm)
+{
+ spin_lock_init(&kvm->arch.kzdev_list_lock);
+ INIT_LIST_HEAD(&kvm->arch.kzdev_list);
+}
+
+void kvm_s390_pci_clear_list(struct kvm *kvm)
+{
+ /*
+ * This list should already be empty, either via vfio device closures
+ * or kvm fd cleanup.
+ */
+ spin_lock(&kvm->arch.kzdev_list_lock);
+ WARN_ON_ONCE(!list_empty(&kvm->arch.kzdev_list));
+ spin_unlock(&kvm->arch.kzdev_list_lock);
+}
+
+static struct zpci_dev *get_zdev_from_kvm_by_fh(struct kvm *kvm, u32 fh)
+{
+ struct zpci_dev *zdev = NULL;
+ struct kvm_zdev *kzdev;
+
+ spin_lock(&kvm->arch.kzdev_list_lock);
+ list_for_each_entry(kzdev, &kvm->arch.kzdev_list, entry) {
+ if (kzdev->zdev->fh == fh) {
+ zdev = kzdev->zdev;
+ break;
+ }
+ }
+ spin_unlock(&kvm->arch.kzdev_list_lock);
+
+ return zdev;
+}
+
+static int kvm_s390_pci_zpci_reg_aen(struct zpci_dev *zdev,
+ struct kvm_s390_zpci_op *args)
+{
+ struct zpci_fib fib = {};
+ bool hostflag;
+
+ fib.fmt0.aibv = args->u.reg_aen.ibv;
+ fib.fmt0.isc = args->u.reg_aen.isc;
+ fib.fmt0.noi = args->u.reg_aen.noi;
+ if (args->u.reg_aen.sb != 0) {
+ fib.fmt0.aisb = args->u.reg_aen.sb;
+ fib.fmt0.aisbo = args->u.reg_aen.sbo;
+ fib.fmt0.sum = 1;
+ } else {
+ fib.fmt0.aisb = 0;
+ fib.fmt0.aisbo = 0;
+ fib.fmt0.sum = 0;
+ }
+
+ hostflag = !(args->u.reg_aen.flags & KVM_S390_ZPCIOP_REGAEN_HOST);
+ return kvm_s390_pci_aif_enable(zdev, &fib, hostflag);
+}
+
+int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args)
+{
+ struct kvm_zdev *kzdev;
+ struct zpci_dev *zdev;
+ int r;
+
+ zdev = get_zdev_from_kvm_by_fh(kvm, args->fh);
+ if (!zdev)
+ return -ENODEV;
+
+ mutex_lock(&zdev->kzdev_lock);
+ mutex_lock(&kvm->lock);
+
+ kzdev = zdev->kzdev;
+ if (!kzdev) {
+ r = -ENODEV;
+ goto out;
+ }
+ if (kzdev->kvm != kvm) {
+ r = -EPERM;
+ goto out;
+ }
+
+ switch (args->op) {
+ case KVM_S390_ZPCIOP_REG_AEN:
+ /* Fail on unknown flags */
+ if (args->u.reg_aen.flags & ~KVM_S390_ZPCIOP_REGAEN_HOST) {
+ r = -EINVAL;
+ break;
+ }
+ r = kvm_s390_pci_zpci_reg_aen(zdev, args);
+ break;
+ case KVM_S390_ZPCIOP_DEREG_AEN:
+ r = kvm_s390_pci_aif_disable(zdev, false);
+ break;
+ default:
+ r = -EINVAL;
+ }
+
+out:
+ mutex_unlock(&kvm->lock);
+ mutex_unlock(&zdev->kzdev_lock);
+ return r;
+}
+
+int kvm_s390_pci_init(void)
+{
+ zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm;
+ zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm;
+
+ if (!kvm_s390_pci_interp_allowed())
+ return 0;
+
+ aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL);
+ if (!aift)
+ return -ENOMEM;
+
+ spin_lock_init(&aift->gait_lock);
+ mutex_init(&aift->aift_lock);
+
+ return 0;
+}
+
+void kvm_s390_pci_exit(void)
+{
+ zpci_kvm_hook.kvm_register = NULL;
+ zpci_kvm_hook.kvm_unregister = NULL;
+
+ if (!kvm_s390_pci_interp_allowed())
+ return;
+
+ mutex_destroy(&aift->aift_lock);
+
+ kfree(aift);
+}
diff --git a/arch/s390/kvm/pci.h b/arch/s390/kvm/pci.h
new file mode 100644
index 000000000000..486d06ef563f
--- /dev/null
+++ b/arch/s390/kvm/pci.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * s390 kvm PCI passthrough support
+ *
+ * Copyright IBM Corp. 2022
+ *
+ * Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
+ */
+
+#ifndef __KVM_S390_PCI_H
+#define __KVM_S390_PCI_H
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <asm/airq.h>
+#include <asm/cpu.h>
+
+struct kvm_zdev {
+ struct zpci_dev *zdev;
+ struct kvm *kvm;
+ struct zpci_fib fib;
+ struct list_head entry;
+};
+
+struct zpci_gaite {
+ u32 gisa;
+ u8 gisc;
+ u8 count;
+ u8 reserved;
+ u8 aisbo;
+ u64 aisb;
+};
+
+struct zpci_aift {
+ struct zpci_gaite *gait;
+ struct airq_iv *sbv;
+ struct kvm_zdev **kzdev;
+ spinlock_t gait_lock; /* Protects the gait, used during AEN forward */
+ struct mutex aift_lock; /* Protects the other structures in aift */
+};
+
+extern struct zpci_aift *aift;
+
+static inline struct kvm *kvm_s390_pci_si_to_kvm(struct zpci_aift *aift,
+ unsigned long si)
+{
+ if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM) || !aift->kzdev ||
+ !aift->kzdev[si])
+ return NULL;
+ return aift->kzdev[si]->kvm;
+};
+
+int kvm_s390_pci_aen_init(u8 nisc);
+void kvm_s390_pci_aen_exit(void);
+
+void kvm_s390_pci_init_list(struct kvm *kvm);
+void kvm_s390_pci_clear_list(struct kvm *kvm);
+
+int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args);
+
+int kvm_s390_pci_init(void);
+void kvm_s390_pci_exit(void);
+
+static inline bool kvm_s390_pci_interp_allowed(void)
+{
+ struct cpuid cpu_id;
+
+ get_cpu_id(&cpu_id);
+ switch (cpu_id.machine) {
+ case 0x2817:
+ case 0x2818:
+ case 0x2827:
+ case 0x2828:
+ case 0x2964:
+ case 0x2965:
+ /* No SHM on certain machines */
+ return false;
+ default:
+ return (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM) &&
+ sclp.has_zpci_lsi && sclp.has_aeni && sclp.has_aisi &&
+ sclp.has_aisii);
+ }
+}
+
+#endif /* __KVM_S390_PCI_H */
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 83bb5cf97282..3335fa09b6f1 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -442,7 +442,7 @@ static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
vcpu->stat.instruction_ipte_interlock++;
if (psw_bits(vcpu->arch.sie_block->gpsw).pstate)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
+ wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu->kvm));
kvm_s390_retry_instr(vcpu);
VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
return 0;
@@ -873,10 +873,18 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- if (fc > 3) {
- kvm_s390_set_psw_cc(vcpu, 3);
- return 0;
- }
+ /* Bailout forbidden function codes */
+ if (fc > 3 && fc != 15)
+ goto out_no_data;
+
+ /*
+ * fc 15 is provided only with
+ * - PTF/CPU topology support through facility 15
+ * - KVM_CAP_S390_USER_STSI
+ */
+ if (fc == 15 && (!test_kvm_facility(vcpu->kvm, 11) ||
+ !vcpu->kvm->arch.user_stsi))
+ goto out_no_data;
if (vcpu->run->s.regs.gprs[0] & 0x0fffff00
|| vcpu->run->s.regs.gprs[1] & 0xffff0000)
@@ -910,6 +918,10 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
goto out_no_data;
handle_stsi_3_2_2(vcpu, (void *) mem);
break;
+ case 15: /* fc 15 is fully handled in userspace */
+ insert_stsi_usr_data(vcpu, operand2, ar, fc, sel1, sel2);
+ trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
+ return -EREMOTE;
}
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
memcpy((void *)sida_origin(vcpu->arch.sie_block), (void *)mem,
@@ -1471,7 +1483,7 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
access_key = (operand2 & 0xf0) >> 4;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
- ipte_lock(vcpu);
+ ipte_lock(vcpu->kvm);
ret = guest_translate_address_with_key(vcpu, address, ar, &gpa,
GACC_STORE, access_key);
@@ -1508,7 +1520,7 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
}
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
- ipte_unlock(vcpu);
+ ipte_unlock(vcpu->kvm);
return ret;
}
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index cc7c9599f43e..7cb7799a0acb 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -7,13 +7,25 @@
*/
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <linux/minmax.h>
#include <linux/pagemap.h>
#include <linux/sched/signal.h>
#include <asm/gmap.h>
#include <asm/uv.h>
#include <asm/mman.h>
+#include <linux/pagewalk.h>
+#include <linux/sched/mm.h>
+#include <linux/mmu_notifier.h>
#include "kvm-s390.h"
+static void kvm_s390_clear_pv_state(struct kvm *kvm)
+{
+ kvm->arch.pv.handle = 0;
+ kvm->arch.pv.guest_len = 0;
+ kvm->arch.pv.stor_base = 0;
+ kvm->arch.pv.stor_var = NULL;
+}
+
int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
{
int cc;
@@ -108,7 +120,7 @@ static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
vfree(kvm->arch.pv.stor_var);
free_pages(kvm->arch.pv.stor_base,
get_order(uv_info.guest_base_stor_len));
- memset(&kvm->arch.pv, 0, sizeof(kvm->arch.pv));
+ kvm_s390_clear_pv_state(kvm);
}
static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
@@ -152,21 +164,51 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
{
int cc;
- /* make all pages accessible before destroying the guest */
- s390_reset_acc(kvm->mm);
-
cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
- atomic_set(&kvm->mm->context.is_protected, 0);
+ /*
+ * if the mm still has a mapping, make all its pages accessible
+ * before destroying the guest
+ */
+ if (mmget_not_zero(kvm->mm)) {
+ s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
+ mmput(kvm->mm);
+ }
+
+ if (!cc) {
+ atomic_dec(&kvm->mm->context.protected_count);
+ kvm_s390_pv_dealloc_vm(kvm);
+ } else {
+ /* Intended memory leak on "impossible" error */
+ s390_replace_asce(kvm->arch.gmap);
+ }
KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
- /* Inteded memory leak on "impossible" error */
- if (!cc)
- kvm_s390_pv_dealloc_vm(kvm);
+
return cc ? -EIO : 0;
}
+static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
+ struct mm_struct *mm)
+{
+ struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier);
+ u16 dummy;
+
+ /*
+ * No locking is needed since this is the last thread of the last user of this
+ * struct mm.
+ * When the struct kvm gets deinitialized, this notifier is also
+ * unregistered. This means that if this notifier runs, then the
+ * struct kvm is still valid.
+ */
+ kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
+}
+
+static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
+ .release = kvm_s390_pv_mmu_notifier_release,
+};
+
int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
{
struct uv_cb_cgc uvcb = {
@@ -197,14 +239,22 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
/* Outputs */
kvm->arch.pv.handle = uvcb.guest_handle;
+ atomic_inc(&kvm->mm->context.protected_count);
if (cc) {
- if (uvcb.header.rc & UVC_RC_NEED_DESTROY)
+ if (uvcb.header.rc & UVC_RC_NEED_DESTROY) {
kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
- else
+ } else {
+ atomic_dec(&kvm->mm->context.protected_count);
kvm_s390_pv_dealloc_vm(kvm);
+ }
return -EIO;
}
kvm->arch.gmap->guest_handle = uvcb.guest_handle;
+ /* Add the notifier only once. No races because we hold kvm->lock */
+ if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
+ kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
+ mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
+ }
return 0;
}
@@ -224,8 +274,6 @@ int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
*rrc = uvcb.header.rrc;
KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
*rc, *rrc);
- if (!cc)
- atomic_set(&kvm->mm->context.is_protected, 1);
return cc ? -EINVAL : 0;
}
@@ -298,3 +346,200 @@ int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
return -EINVAL;
return 0;
}
+
+int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_dump_cpu uvcb = {
+ .header.cmd = UVC_CMD_DUMP_CPU,
+ .header.len = sizeof(uvcb),
+ .cpu_handle = vcpu->arch.pv.handle,
+ .dump_area_origin = (u64)buff,
+ };
+ int cc;
+
+ cc = uv_call_sched(0, (u64)&uvcb);
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ return cc;
+}
+
+/* Size of the cache for the storage state dump data. 1MB for now */
+#define DUMP_BUFF_LEN HPAGE_SIZE
+
+/**
+ * kvm_s390_pv_dump_stor_state
+ *
+ * @kvm: pointer to the guest's KVM struct
+ * @buff_user: Userspace pointer where we will write the results to
+ * @gaddr: Starting absolute guest address for which the storage state
+ * is requested.
+ * @buff_user_len: Length of the buff_user buffer
+ * @rc: Pointer to where the uvcb return code is stored
+ * @rrc: Pointer to where the uvcb return reason code is stored
+ *
+ * Stores buff_len bytes of tweak component values to buff_user
+ * starting with the 1MB block specified by the absolute guest address
+ * (gaddr). The gaddr pointer will be updated with the last address
+ * for which data was written when returning to userspace. buff_user
+ * might be written to even if an error rc is returned. For instance
+ * if we encounter a fault after writing the first page of data.
+ *
+ * Context: kvm->lock needs to be held
+ *
+ * Return:
+ * 0 on success
+ * -ENOMEM if allocating the cache fails
+ * -EINVAL if gaddr is not aligned to 1MB
+ * -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len
+ * -EINVAL if the UV call fails, rc and rrc will be set in this case
+ * -EFAULT if copying the result to buff_user failed
+ */
+int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
+ u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_dump_stor_state uvcb = {
+ .header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE,
+ .header.len = sizeof(uvcb),
+ .config_handle = kvm->arch.pv.handle,
+ .gaddr = *gaddr,
+ .dump_area_origin = 0,
+ };
+ const u64 increment_len = uv_info.conf_dump_storage_state_len;
+ size_t buff_kvm_size;
+ size_t size_done = 0;
+ u8 *buff_kvm = NULL;
+ int cc, ret;
+
+ ret = -EINVAL;
+ /* UV call processes 1MB guest storage chunks at a time */
+ if (!IS_ALIGNED(*gaddr, HPAGE_SIZE))
+ goto out;
+
+ /*
+ * We provide the storage state for 1MB chunks of guest
+ * storage. The buffer will need to be aligned to
+ * conf_dump_storage_state_len so we don't end on a partial
+ * chunk.
+ */
+ if (!buff_user_len ||
+ !IS_ALIGNED(buff_user_len, increment_len))
+ goto out;
+
+ /*
+ * Allocate a buffer from which we will later copy to the user
+ * process. We don't want userspace to dictate our buffer size
+ * so we limit it to DUMP_BUFF_LEN.
+ */
+ ret = -ENOMEM;
+ buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN);
+ buff_kvm = vzalloc(buff_kvm_size);
+ if (!buff_kvm)
+ goto out;
+
+ ret = 0;
+ uvcb.dump_area_origin = (u64)buff_kvm;
+ /* We will loop until the user buffer is filled or an error occurs */
+ do {
+ /* Get 1MB worth of guest storage state data */
+ cc = uv_call_sched(0, (u64)&uvcb);
+
+ /* All or nothing */
+ if (cc) {
+ ret = -EINVAL;
+ break;
+ }
+
+ size_done += increment_len;
+ uvcb.dump_area_origin += increment_len;
+ buff_user_len -= increment_len;
+ uvcb.gaddr += HPAGE_SIZE;
+
+ /* KVM Buffer full, time to copy to the process */
+ if (!buff_user_len || size_done == DUMP_BUFF_LEN) {
+ if (copy_to_user(buff_user, buff_kvm, size_done)) {
+ ret = -EFAULT;
+ break;
+ }
+
+ buff_user += size_done;
+ size_done = 0;
+ uvcb.dump_area_origin = (u64)buff_kvm;
+ }
+ } while (buff_user_len);
+
+ /* Report back where we ended dumping */
+ *gaddr = uvcb.gaddr;
+
+ /* Lets only log errors, we don't want to spam */
+out:
+ if (ret)
+ KVM_UV_EVENT(kvm, 3,
+ "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x",
+ uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc);
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ vfree(buff_kvm);
+
+ return ret;
+}
+
+/**
+ * kvm_s390_pv_dump_complete
+ *
+ * @kvm: pointer to the guest's KVM struct
+ * @buff_user: Userspace pointer where we will write the results to
+ * @rc: Pointer to where the uvcb return code is stored
+ * @rrc: Pointer to where the uvcb return reason code is stored
+ *
+ * Completes the dumping operation and writes the completion data to
+ * user space.
+ *
+ * Context: kvm->lock needs to be held
+ *
+ * Return:
+ * 0 on success
+ * -ENOMEM if allocating the completion buffer fails
+ * -EINVAL if the UV call fails, rc and rrc will be set in this case
+ * -EFAULT if copying the result to buff_user failed
+ */
+int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
+ u16 *rc, u16 *rrc)
+{
+ struct uv_cb_dump_complete complete = {
+ .header.len = sizeof(complete),
+ .header.cmd = UVC_CMD_DUMP_COMPLETE,
+ .config_handle = kvm_s390_pv_get_handle(kvm),
+ };
+ u64 *compl_data;
+ int ret;
+
+ /* Allocate dump area */
+ compl_data = vzalloc(uv_info.conf_dump_finalize_len);
+ if (!compl_data)
+ return -ENOMEM;
+ complete.dump_area_origin = (u64)compl_data;
+
+ ret = uv_call_sched(0, (u64)&complete);
+ *rc = complete.header.rc;
+ *rrc = complete.header.rrc;
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x",
+ complete.header.rc, complete.header.rrc);
+
+ if (!ret) {
+ /*
+ * kvm_s390_pv_dealloc_vm() will also (mem)set
+ * this to false on a reboot or other destroy
+ * operation for this vm.
+ */
+ kvm->arch.pv.dumping = false;
+ kvm_s390_vcpu_unblock_all(kvm);
+ ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len);
+ if (ret)
+ ret = -EFAULT;
+ }
+ vfree(compl_data);
+ /* If the UVC returned an error, translate it to -EINVAL */
+ if (ret > 0)
+ ret = -EINVAL;
+ return ret;
+}
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 8aaee2892ec3..cb747bf6c798 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -480,9 +480,9 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
struct kvm_vcpu *dest_vcpu;
u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
- trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
-
if (order_code == SIGP_EXTERNAL_CALL) {
+ trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
+
dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
BUG_ON(dest_vcpu == NULL);
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index dada78b92691..94138f8f0c1c 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -503,6 +503,14 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
/* Host-protection-interruption introduced with ESOP */
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
+ /*
+ * CPU Topology
+ * This facility only uses the utility field of the SCA and none of
+ * the cpu entries that are problematic with the other interpretation
+ * facilities so we can pass it through
+ */
+ if (test_kvm_facility(vcpu->kvm, 11))
+ scb_s->ecb |= scb_o->ecb & ECB_PTF;
/* transactional execution */
if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) {
/* remap the prefix is tx is toggled on */
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 5d415b3db6d1..580d2e3265cb 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -7,7 +7,6 @@ lib-y += delay.o string.o uaccess.o find.o spinlock.o
obj-y += mem.o xor.o
lib-$(CONFIG_KPROBES) += probes.o
lib-$(CONFIG_UPROBES) += probes.o
-obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o
obj-$(CONFIG_S390_KPROBES_SANITY_TEST) += test_kprobes_s390.o
test_kprobes_s390-objs += test_kprobes_asm.o test_kprobes.o
@@ -22,3 +21,5 @@ obj-$(CONFIG_S390_MODULES_SANITY_TEST) += test_modules.o
obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+
+obj-$(CONFIG_EXPOLINE_EXTERN) += expoline/
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index f7f5adea8940..be14c58cb989 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -13,13 +13,10 @@
void __delay(unsigned long loops)
{
- /*
- * To end the bloody studid and useless discussion about the
- * BogoMips number I took the liberty to define the __delay
- * function in a way that that resulting BogoMips number will
- * yield the megahertz number of the cpu. The important function
- * is udelay and that is done using the tod clock. -- martin.
- */
+ /*
+ * Loop 'loops' times. Callers must not assume a specific
+ * amount of time passes before this function returns.
+ */
asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
}
EXPORT_SYMBOL(__delay);
diff --git a/arch/s390/lib/expoline/Makefile b/arch/s390/lib/expoline/Makefile
new file mode 100644
index 000000000000..854631d9cb03
--- /dev/null
+++ b/arch/s390/lib/expoline/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y += expoline.o
diff --git a/arch/s390/lib/expoline.S b/arch/s390/lib/expoline/expoline.S
index 92ed8409a7a4..92ed8409a7a4 100644
--- a/arch/s390/lib/expoline.S
+++ b/arch/s390/lib/expoline/expoline.S
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index d7b3b193d108..720036fb1924 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -81,8 +81,9 @@ unsigned long _copy_from_user_key(void *to, const void __user *from,
might_fault();
if (!should_fail_usercopy()) {
- instrument_copy_from_user(to, from, n);
+ instrument_copy_from_user_before(to, from, n);
res = raw_copy_from_user_key(to, from, n, key);
+ instrument_copy_from_user_after(to, from, n, res);
}
if (unlikely(res))
memset(to + (n - res), 0, res);
@@ -156,7 +157,7 @@ unsigned long __clear_user(void __user *to, unsigned long size)
asm volatile(
" lr 0,%[spec]\n"
"0: mvcos 0(%1),0(%4),%0\n"
- " jz 4f\n"
+ "6: jz 4f\n"
"1: algr %0,%2\n"
" slgr %1,%2\n"
" j 0b\n"
@@ -166,11 +167,11 @@ unsigned long __clear_user(void __user *to, unsigned long size)
" clgr %0,%3\n" /* copy crosses next page boundary? */
" jnh 5f\n"
"3: mvcos 0(%1),0(%4),%3\n"
- " slgr %0,%3\n"
+ "7: slgr %0,%3\n"
" j 5f\n"
"4: slgr %0,%0\n"
"5:\n"
- EX_TABLE(0b,2b) EX_TABLE(3b,5b)
+ EX_TABLE(0b,2b) EX_TABLE(6b,2b) EX_TABLE(3b,5b) EX_TABLE(7b,5b)
: "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
: "a" (empty_zero_page), [spec] "d" (spec.val)
: "cc", "memory", "0");
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 9f9af5298dd6..9953819d7959 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -8,8 +8,10 @@
#include <linux/kasan.h>
#include <asm/ptdump.h>
#include <asm/kasan.h>
+#include <asm/abs_lowcore.h>
#include <asm/nospec-branch.h>
#include <asm/sections.h>
+#include <asm/maccess.h>
static unsigned long max_addr;
@@ -21,6 +23,8 @@ struct addr_marker {
enum address_markers_idx {
IDENTITY_BEFORE_NR = 0,
IDENTITY_BEFORE_END_NR,
+ AMODE31_START_NR,
+ AMODE31_END_NR,
KERNEL_START_NR,
KERNEL_END_NR,
#ifdef CONFIG_KFENCE
@@ -39,11 +43,17 @@ enum address_markers_idx {
VMALLOC_END_NR,
MODULES_NR,
MODULES_END_NR,
+ ABS_LOWCORE_NR,
+ ABS_LOWCORE_END_NR,
+ MEMCPY_REAL_NR,
+ MEMCPY_REAL_END_NR,
};
static struct addr_marker address_markers[] = {
[IDENTITY_BEFORE_NR] = {0, "Identity Mapping Start"},
[IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"},
+ [AMODE31_START_NR] = {0, "Amode31 Area Start"},
+ [AMODE31_END_NR] = {0, "Amode31 Area End"},
[KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"},
[KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"},
#ifdef CONFIG_KFENCE
@@ -62,6 +72,10 @@ static struct addr_marker address_markers[] = {
[VMALLOC_END_NR] = {0, "vmalloc Area End"},
[MODULES_NR] = {0, "Modules Area Start"},
[MODULES_END_NR] = {0, "Modules Area End"},
+ [ABS_LOWCORE_NR] = {0, "Lowcore Area Start"},
+ [ABS_LOWCORE_END_NR] = {0, "Lowcore Area End"},
+ [MEMCPY_REAL_NR] = {0, "Real Memory Copy Area Start"},
+ [MEMCPY_REAL_END_NR] = {0, "Real Memory Copy Area End"},
{ -1, NULL }
};
@@ -276,8 +290,14 @@ static int pt_dump_init(void)
max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
max_addr = 1UL << (max_addr * 11 + 31);
address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size;
+ address_markers[AMODE31_START_NR].start_address = __samode31;
+ address_markers[AMODE31_END_NR].start_address = __eamode31;
address_markers[MODULES_NR].start_address = MODULES_VADDR;
address_markers[MODULES_END_NR].start_address = MODULES_END;
+ address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore;
+ address_markers[ABS_LOWCORE_END_NR].start_address = __abs_lowcore + ABS_LOWCORE_MAP_SIZE;
+ address_markers[MEMCPY_REAL_NR].start_address = __memcpy_real_area;
+ address_markers[MEMCPY_REAL_END_NR].start_address = __memcpy_real_area + PAGE_SIZE;
address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size;
address_markers[VMALLOC_NR].start_address = VMALLOC_START;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index e173b6187ad5..9649d9382e0a 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -268,8 +268,7 @@ static noinline void do_sigbus(struct pt_regs *regs)
(void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
}
-static noinline void do_fault_error(struct pt_regs *regs, int access,
- vm_fault_t fault)
+static noinline void do_fault_error(struct pt_regs *regs, vm_fault_t fault)
{
int si_code;
@@ -379,7 +378,9 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
flags = FAULT_FLAG_DEFAULT;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
- if (access == VM_WRITE || is_write)
+ if (is_write)
+ access = VM_WRITE;
+ if (access == VM_WRITE)
flags |= FAULT_FLAG_WRITE;
mmap_read_lock(mm);
@@ -419,8 +420,6 @@ retry:
if (unlikely(!(vma->vm_flags & access)))
goto out_up;
- if (is_vm_hugetlb_page(vma))
- address &= HPAGE_MASK;
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
@@ -433,6 +432,17 @@ retry:
goto out_up;
goto out;
}
+
+ /* The fault is fully completed (including releasing mmap lock) */
+ if (fault & VM_FAULT_COMPLETED) {
+ if (gmap) {
+ mmap_read_lock(mm);
+ goto out_gmap;
+ }
+ fault = 0;
+ goto out;
+ }
+
if (unlikely(fault & VM_FAULT_ERROR))
goto out_up;
@@ -452,6 +462,7 @@ retry:
mmap_read_lock(mm);
goto retry;
}
+out_gmap:
if (IS_ENABLED(CONFIG_PGSTE) && gmap) {
address = __gmap_link(gmap, current->thread.gmap_addr,
address);
@@ -504,7 +515,7 @@ void do_protection_exception(struct pt_regs *regs)
fault = do_exception(regs, access);
}
if (unlikely(fault))
- do_fault_error(regs, access, fault);
+ do_fault_error(regs, fault);
}
NOKPROBE_SYMBOL(do_protection_exception);
@@ -516,7 +527,7 @@ void do_dat_exception(struct pt_regs *regs)
access = VM_ACCESS_FLAGS;
fault = do_exception(regs, access);
if (unlikely(fault))
- do_fault_error(regs, access, fault);
+ do_fault_error(regs, fault);
}
NOKPROBE_SYMBOL(do_dat_exception);
@@ -754,6 +765,7 @@ void do_secure_storage_access(struct pt_regs *regs)
struct vm_area_struct *vma;
struct mm_struct *mm;
struct page *page;
+ struct gmap *gmap;
int rc;
/*
@@ -783,13 +795,24 @@ void do_secure_storage_access(struct pt_regs *regs)
}
switch (get_fault_type(regs)) {
+ case GMAP_FAULT:
+ mm = current->mm;
+ gmap = (struct gmap *)S390_lowcore.gmap;
+ mmap_read_lock(mm);
+ addr = __gmap_translate(gmap, addr);
+ mmap_read_unlock(mm);
+ if (IS_ERR_VALUE(addr)) {
+ do_fault_error(regs, VM_FAULT_BADMAP);
+ break;
+ }
+ fallthrough;
case USER_FAULT:
mm = current->mm;
mmap_read_lock(mm);
vma = find_vma(mm, addr);
if (!vma) {
mmap_read_unlock(mm);
- do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ do_fault_error(regs, VM_FAULT_BADMAP);
break;
}
page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET);
@@ -811,9 +834,8 @@ void do_secure_storage_access(struct pt_regs *regs)
if (rc)
BUG();
break;
- case GMAP_FAULT:
default:
- do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ do_fault_error(regs, VM_FAULT_BADMAP);
WARN_ON_ONCE(1);
}
}
@@ -825,7 +847,7 @@ void do_non_secure_storage_access(struct pt_regs *regs)
struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
if (get_fault_type(regs) != GMAP_FAULT) {
- do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ do_fault_error(regs, VM_FAULT_BADMAP);
WARN_ON_ONCE(1);
return;
}
@@ -837,6 +859,16 @@ NOKPROBE_SYMBOL(do_non_secure_storage_access);
void do_secure_storage_violation(struct pt_regs *regs)
{
+ unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK;
+ struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
+
+ /*
+ * If the VM has been rebooted, its address space might still contain
+ * secure pages from the previous boot.
+ * Clear the page so it can be reused.
+ */
+ if (!gmap_destroy_page(gmap, gaddr))
+ return;
/*
* Either KVM messed up the secure guest mapping or the same
* page is mapped into multiple secure guests.
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index b8ae4a4aa2ba..02d15c8dc92e 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2515,8 +2515,9 @@ static const struct mm_walk_ops thp_split_walk_ops = {
static inline void thp_split_mm(struct mm_struct *mm)
{
struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, mm, 0);
- for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
+ for_each_vma(vmi, vma) {
vma->vm_flags &= ~VM_HUGEPAGE;
vma->vm_flags |= VM_NOHUGEPAGE;
walk_page_vma(vma, &thp_split_walk_ops, NULL);
@@ -2584,8 +2585,9 @@ int gmap_mark_unmergeable(void)
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
int ret;
+ VMA_ITERATOR(vmi, mm, 0);
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ for_each_vma(vmi, vma) {
ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
MADV_UNMERGEABLE, &vma->vm_flags);
if (ret)
@@ -2697,41 +2699,168 @@ void s390_reset_cmma(struct mm_struct *mm)
}
EXPORT_SYMBOL_GPL(s390_reset_cmma);
+#define GATHER_GET_PAGES 32
+
+struct reset_walk_state {
+ unsigned long next;
+ unsigned long count;
+ unsigned long pfns[GATHER_GET_PAGES];
+};
+
+static int s390_gather_pages(pte_t *ptep, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct reset_walk_state *p = walk->private;
+ pte_t pte = READ_ONCE(*ptep);
+
+ if (pte_present(pte)) {
+ /* we have a reference from the mapping, take an extra one */
+ get_page(phys_to_page(pte_val(pte)));
+ p->pfns[p->count] = phys_to_pfn(pte_val(pte));
+ p->next = next;
+ p->count++;
+ }
+ return p->count >= GATHER_GET_PAGES;
+}
+
+static const struct mm_walk_ops gather_pages_ops = {
+ .pte_entry = s390_gather_pages,
+};
+
/*
- * make inaccessible pages accessible again
+ * Call the Destroy secure page UVC on each page in the given array of PFNs.
+ * Each page needs to have an extra reference, which will be released here.
*/
-static int __s390_reset_acc(pte_t *ptep, unsigned long addr,
- unsigned long next, struct mm_walk *walk)
+void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns)
{
- pte_t pte = READ_ONCE(*ptep);
+ unsigned long i;
- /* There is a reference through the mapping */
- if (pte_present(pte))
- WARN_ON_ONCE(uv_destroy_owned_page(pte_val(pte) & PAGE_MASK));
+ for (i = 0; i < count; i++) {
+ /* we always have an extra reference */
+ uv_destroy_owned_page(pfn_to_phys(pfns[i]));
+ /* get rid of the extra reference */
+ put_page(pfn_to_page(pfns[i]));
+ cond_resched();
+ }
+}
+EXPORT_SYMBOL_GPL(s390_uv_destroy_pfns);
+/**
+ * __s390_uv_destroy_range - Call the destroy secure page UVC on each page
+ * in the given range of the given address space.
+ * @mm: the mm to operate on
+ * @start: the start of the range
+ * @end: the end of the range
+ * @interruptible: if not 0, stop when a fatal signal is received
+ *
+ * Walk the given range of the given address space and call the destroy
+ * secure page UVC on each page. Optionally exit early if a fatal signal is
+ * pending.
+ *
+ * Return: 0 on success, -EINTR if the function stopped before completing
+ */
+int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, bool interruptible)
+{
+ struct reset_walk_state state = { .next = start };
+ int r = 1;
+
+ while (r > 0) {
+ state.count = 0;
+ mmap_read_lock(mm);
+ r = walk_page_range(mm, state.next, end, &gather_pages_ops, &state);
+ mmap_read_unlock(mm);
+ cond_resched();
+ s390_uv_destroy_pfns(state.count, state.pfns);
+ if (interruptible && fatal_signal_pending(current))
+ return -EINTR;
+ }
return 0;
}
+EXPORT_SYMBOL_GPL(__s390_uv_destroy_range);
-static const struct mm_walk_ops reset_acc_walk_ops = {
- .pte_entry = __s390_reset_acc,
-};
+/**
+ * s390_unlist_old_asce - Remove the topmost level of page tables from the
+ * list of page tables of the gmap.
+ * @gmap: the gmap whose table is to be removed
+ *
+ * On s390x, KVM keeps a list of all pages containing the page tables of the
+ * gmap (the CRST list). This list is used at tear down time to free all
+ * pages that are now not needed anymore.
+ *
+ * This function removes the topmost page of the tree (the one pointed to by
+ * the ASCE) from the CRST list.
+ *
+ * This means that it will not be freed when the VM is torn down, and needs
+ * to be handled separately by the caller, unless a leak is actually
+ * intended. Notice that this function will only remove the page from the
+ * list, the page will still be used as a top level page table (and ASCE).
+ */
+void s390_unlist_old_asce(struct gmap *gmap)
+{
+ struct page *old;
-#include <linux/sched/mm.h>
-void s390_reset_acc(struct mm_struct *mm)
+ old = virt_to_page(gmap->table);
+ spin_lock(&gmap->guest_table_lock);
+ list_del(&old->lru);
+ /*
+ * Sometimes the topmost page might need to be "removed" multiple
+ * times, for example if the VM is rebooted into secure mode several
+ * times concurrently, or if s390_replace_asce fails after calling
+ * s390_remove_old_asce and is attempted again later. In that case
+ * the old asce has been removed from the list, and therefore it
+ * will not be freed when the VM terminates, but the ASCE is still
+ * in use and still pointed to.
+ * A subsequent call to replace_asce will follow the pointer and try
+ * to remove the same page from the list again.
+ * Therefore it's necessary that the page of the ASCE has valid
+ * pointers, so list_del can work (and do nothing) without
+ * dereferencing stale or invalid pointers.
+ */
+ INIT_LIST_HEAD(&old->lru);
+ spin_unlock(&gmap->guest_table_lock);
+}
+EXPORT_SYMBOL_GPL(s390_unlist_old_asce);
+
+/**
+ * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
+ * @gmap: the gmap whose ASCE needs to be replaced
+ *
+ * If the allocation of the new top level page table fails, the ASCE is not
+ * replaced.
+ * In any case, the old ASCE is always removed from the gmap CRST list.
+ * Therefore the caller has to make sure to save a pointer to it
+ * beforehand, unless a leak is actually intended.
+ */
+int s390_replace_asce(struct gmap *gmap)
{
- if (!mm_is_protected(mm))
- return;
+ unsigned long asce;
+ struct page *page;
+ void *table;
+
+ s390_unlist_old_asce(gmap);
+
+ page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
+ if (!page)
+ return -ENOMEM;
+ table = page_to_virt(page);
+ memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
+
/*
- * we might be called during
- * reset: we walk the pages and clear
- * close of all kvm file descriptors: we walk the pages and clear
- * exit of process on fd closure: vma already gone, do nothing
+ * The caller has to deal with the old ASCE, but here we make sure
+ * the new one is properly added to the CRST list, so that
+ * it will be freed when the VM is torn down.
*/
- if (!mmget_not_zero(mm))
- return;
- mmap_read_lock(mm);
- walk_page_range(mm, 0, TASK_SIZE, &reset_acc_walk_ops, NULL);
- mmap_read_unlock(mm);
- mmput(mm);
+ spin_lock(&gmap->guest_table_lock);
+ list_add(&page->lru, &gmap->crst_list);
+ spin_unlock(&gmap->guest_table_lock);
+
+ /* Set new table origin while preserving existing ASCE control bits */
+ asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table);
+ WRITE_ONCE(gmap->asce, asce);
+ WRITE_ONCE(gmap->mm->context.gmap_asce, asce);
+ WRITE_ONCE(gmap->table, table);
+
+ return 0;
}
-EXPORT_SYMBOL_GPL(s390_reset_acc);
+EXPORT_SYMBOL_GPL(s390_replace_asce);
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 10e51ef9c79a..c299a18273ff 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -237,16 +237,6 @@ int pud_huge(pud_t pud)
return pud_large(pud);
}
-struct page *
-follow_huge_pud(struct mm_struct *mm, unsigned long address,
- pud_t *pud, int flags)
-{
- if (flags & FOLL_GET)
- return NULL;
-
- return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
-}
-
bool __init arch_hugetlb_valid_size(unsigned long size)
{
if (MACHINE_HAS_EDAT1 && size == PMD_SIZE)
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 6a0ac00d5a42..97d66a3e60fb 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -31,14 +31,13 @@
#include <linux/cma.h>
#include <linux/gfp.h>
#include <linux/dma-direct.h>
-#include <linux/platform-feature.h>
#include <asm/processor.h>
#include <linux/uaccess.h>
#include <asm/pgalloc.h>
#include <asm/kfence.h>
#include <asm/ptdump.h>
#include <asm/dma.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
@@ -48,6 +47,7 @@
#include <asm/kasan.h>
#include <asm/dma-mapping.h>
#include <asm/uv.h>
+#include <linux/virtio_anchor.h>
#include <linux/virtio_config.h>
pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir");
@@ -175,7 +175,7 @@ static void pv_init(void)
if (!is_prot_virt_guest())
return;
- platform_set(PLATFORM_VIRTIO_RESTRICTED_MEM_ACCESS);
+ virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc);
/* make sure bounce buffers are shared */
swiotlb_init(true, SWIOTLB_FORCE | SWIOTLB_VERBOSE);
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 421efa46946b..1571cdcb0c50 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -12,10 +12,17 @@
#include <linux/errno.h>
#include <linux/gfp.h>
#include <linux/cpu.h>
+#include <linux/uio.h>
#include <asm/asm-extable.h>
#include <asm/ctl_reg.h>
#include <asm/io.h>
+#include <asm/abs_lowcore.h>
#include <asm/stacktrace.h>
+#include <asm/maccess.h>
+
+unsigned long __bootdata_preserved(__memcpy_real_area);
+static __ro_after_init pte_t *memcpy_real_ptep;
+static DEFINE_MUTEX(memcpy_real_mutex);
static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size)
{
@@ -76,144 +83,72 @@ notrace void *s390_kernel_write(void *dst, const void *src, size_t size)
return dst;
}
-static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count)
-{
- union register_pair _dst, _src;
- int rc = -EFAULT;
-
- _dst.even = (unsigned long) dest;
- _dst.odd = (unsigned long) count;
- _src.even = (unsigned long) src;
- _src.odd = (unsigned long) count;
- asm volatile (
- "0: mvcle %[dst],%[src],0\n"
- "1: jo 0b\n"
- " lhi %[rc],0\n"
- "2:\n"
- EX_TABLE(1b,2b)
- : [rc] "+&d" (rc), [dst] "+&d" (_dst.pair), [src] "+&d" (_src.pair)
- : : "cc", "memory");
- return rc;
-}
-
-static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest,
- unsigned long src,
- unsigned long count)
-{
- int irqs_disabled, rc;
- unsigned long flags;
-
- if (!count)
- return 0;
- flags = arch_local_irq_save();
- irqs_disabled = arch_irqs_disabled_flags(flags);
- if (!irqs_disabled)
- trace_hardirqs_off();
- __arch_local_irq_stnsm(0xf8); // disable DAT
- rc = __memcpy_real((void *) dest, (void *) src, (size_t) count);
- if (flags & PSW_MASK_DAT)
- __arch_local_irq_stosm(0x04); // enable DAT
- if (!irqs_disabled)
- trace_hardirqs_on();
- __arch_local_irq_ssm(flags);
- return rc;
-}
-
-/*
- * Copy memory in real mode (kernel to kernel)
- */
-int memcpy_real(void *dest, unsigned long src, size_t count)
+void __init memcpy_real_init(void)
{
- unsigned long _dest = (unsigned long)dest;
- unsigned long _src = (unsigned long)src;
- unsigned long _count = (unsigned long)count;
- int rc;
-
- if (S390_lowcore.nodat_stack != 0) {
- preempt_disable();
- rc = call_on_stack(3, S390_lowcore.nodat_stack,
- unsigned long, _memcpy_real,
- unsigned long, _dest,
- unsigned long, _src,
- unsigned long, _count);
- preempt_enable();
- return rc;
- }
- /*
- * This is a really early memcpy_real call, the stacks are
- * not set up yet. Just call _memcpy_real on the early boot
- * stack
- */
- return _memcpy_real(_dest, _src, _count);
+ memcpy_real_ptep = vmem_get_alloc_pte(__memcpy_real_area, true);
+ if (!memcpy_real_ptep)
+ panic("Couldn't setup memcpy real area");
}
-/*
- * Copy memory in absolute mode (kernel to kernel)
- */
-void memcpy_absolute(void *dest, void *src, size_t count)
+size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count)
{
- unsigned long cr0, flags, prefix;
-
- flags = arch_local_irq_save();
- __ctl_store(cr0, 0, 0);
- __ctl_clear_bit(0, 28); /* disable lowcore protection */
- prefix = store_prefix();
- if (prefix) {
- local_mcck_disable();
- set_prefix(0);
- memcpy(dest, src, count);
- set_prefix(prefix);
- local_mcck_enable();
- } else {
- memcpy(dest, src, count);
+ size_t len, copied, res = 0;
+ unsigned long phys, offset;
+ void *chunk;
+ pte_t pte;
+
+ while (count) {
+ phys = src & PAGE_MASK;
+ offset = src & ~PAGE_MASK;
+ chunk = (void *)(__memcpy_real_area + offset);
+ len = min(count, PAGE_SIZE - offset);
+ pte = mk_pte_phys(phys, PAGE_KERNEL_RO);
+
+ mutex_lock(&memcpy_real_mutex);
+ if (pte_val(pte) != pte_val(*memcpy_real_ptep)) {
+ __ptep_ipte(__memcpy_real_area, memcpy_real_ptep, 0, 0, IPTE_GLOBAL);
+ set_pte(memcpy_real_ptep, pte);
+ }
+ copied = copy_to_iter(chunk, len, iter);
+ mutex_unlock(&memcpy_real_mutex);
+
+ count -= copied;
+ src += copied;
+ res += copied;
+ if (copied < len)
+ break;
}
- __ctl_load(cr0, 0, 0);
- arch_local_irq_restore(flags);
+ return res;
}
-/*
- * Copy memory from kernel (real) to user (virtual)
- */
-int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count)
+int memcpy_real(void *dest, unsigned long src, size_t count)
{
- int offs = 0, size, rc;
- char *buf;
-
- buf = (char *) __get_free_page(GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
- rc = -EFAULT;
- while (offs < count) {
- size = min(PAGE_SIZE, count - offs);
- if (memcpy_real(buf, src + offs, size))
- goto out;
- if (copy_to_user(dest + offs, buf, size))
- goto out;
- offs += size;
- }
- rc = 0;
-out:
- free_page((unsigned long) buf);
- return rc;
+ struct iov_iter iter;
+ struct kvec kvec;
+
+ kvec.iov_base = dest;
+ kvec.iov_len = count;
+ iov_iter_kvec(&iter, WRITE, &kvec, 1, count);
+ if (memcpy_real_iter(&iter, src, count) < count)
+ return -EFAULT;
+ return 0;
}
/*
- * Check if physical address is within prefix or zero page
+ * Find CPU that owns swapped prefix page
*/
-static int is_swapped(phys_addr_t addr)
+static int get_swapped_owner(phys_addr_t addr)
{
phys_addr_t lc;
int cpu;
- if (addr < sizeof(struct lowcore))
- return 1;
for_each_online_cpu(cpu) {
lc = virt_to_phys(lowcore_ptr[cpu]);
if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc)
continue;
- return 1;
+ return cpu;
}
- return 0;
+ return -1;
}
/*
@@ -226,17 +161,35 @@ void *xlate_dev_mem_ptr(phys_addr_t addr)
{
void *ptr = phys_to_virt(addr);
void *bounce = ptr;
+ struct lowcore *abs_lc;
+ unsigned long flags;
unsigned long size;
+ int this_cpu, cpu;
cpus_read_lock();
- preempt_disable();
- if (is_swapped(addr)) {
- size = PAGE_SIZE - (addr & ~PAGE_MASK);
- bounce = (void *) __get_free_page(GFP_ATOMIC);
- if (bounce)
- memcpy_absolute(bounce, ptr, size);
+ this_cpu = get_cpu();
+ if (addr >= sizeof(struct lowcore)) {
+ cpu = get_swapped_owner(addr);
+ if (cpu < 0)
+ goto out;
}
- preempt_enable();
+ bounce = (void *)__get_free_page(GFP_ATOMIC);
+ if (!bounce)
+ goto out;
+ size = PAGE_SIZE - (addr & ~PAGE_MASK);
+ if (addr < sizeof(struct lowcore)) {
+ abs_lc = get_abs_lowcore(&flags);
+ ptr = (void *)abs_lc + addr;
+ memcpy(bounce, ptr, size);
+ put_abs_lowcore(abs_lc, flags);
+ } else if (cpu == this_cpu) {
+ ptr = (void *)(addr - virt_to_phys(lowcore_ptr[cpu]));
+ memcpy(bounce, ptr, size);
+ } else {
+ memcpy(bounce, ptr, size);
+ }
+out:
+ put_cpu();
cpus_read_unlock();
return bounce;
}
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index d545f5c39f7e..3327c47bc181 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -37,7 +37,7 @@ static inline int mmap_is_legacy(struct rlimit *rlim_stack)
unsigned long arch_mmap_rnd(void)
{
- return (get_random_int() & MMAP_RND_MASK) << PAGE_SHIFT;
+ return (get_random_u32() & MMAP_RND_MASK) << PAGE_SHIFT;
}
static unsigned long mmap_base_legacy(unsigned long rnd)
@@ -188,3 +188,23 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}
+
+static const pgprot_t protection_map[16] = {
+ [VM_NONE] = PAGE_NONE,
+ [VM_READ] = PAGE_RO,
+ [VM_WRITE] = PAGE_RO,
+ [VM_WRITE | VM_READ] = PAGE_RO,
+ [VM_EXEC] = PAGE_RX,
+ [VM_EXEC | VM_READ] = PAGE_RX,
+ [VM_EXEC | VM_WRITE] = PAGE_RX,
+ [VM_EXEC | VM_WRITE | VM_READ] = PAGE_RX,
+ [VM_SHARED] = PAGE_NONE,
+ [VM_SHARED | VM_READ] = PAGE_RO,
+ [VM_SHARED | VM_WRITE] = PAGE_RW,
+ [VM_SHARED | VM_WRITE | VM_READ] = PAGE_RW,
+ [VM_SHARED | VM_EXEC] = PAGE_RX,
+ [VM_SHARED | VM_EXEC | VM_READ] = PAGE_RX,
+ [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_RWX,
+ [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_RWX
+};
+DECLARE_VM_GET_PAGE_PROT
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index c2583f921ca8..ee1a97078527 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -240,7 +240,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
} else if (pmd_none(*pmd)) {
if (IS_ALIGNED(addr, PMD_SIZE) &&
IS_ALIGNED(next, PMD_SIZE) &&
- MACHINE_HAS_EDAT1 && addr && direct &&
+ MACHINE_HAS_EDAT1 && direct &&
!debug_pagealloc_enabled()) {
set_pmd(pmd, __pmd(__pa(addr) | prot));
pages++;
@@ -336,7 +336,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
} else if (pud_none(*pud)) {
if (IS_ALIGNED(addr, PUD_SIZE) &&
IS_ALIGNED(next, PUD_SIZE) &&
- MACHINE_HAS_EDAT2 && addr && direct &&
+ MACHINE_HAS_EDAT2 && direct &&
!debug_pagealloc_enabled()) {
set_pud(pud, __pud(__pa(addr) | prot));
pages++;
@@ -561,6 +561,103 @@ int vmem_add_mapping(unsigned long start, unsigned long size)
}
/*
+ * Allocate new or return existing page-table entry, but do not map it
+ * to any physical address. If missing, allocate segment- and region-
+ * table entries along. Meeting a large segment- or region-table entry
+ * while traversing is an error, since the function is expected to be
+ * called against virtual regions reserverd for 4KB mappings only.
+ */
+pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc)
+{
+ pte_t *ptep = NULL;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = pgd_offset_k(addr);
+ if (pgd_none(*pgd)) {
+ if (!alloc)
+ goto out;
+ p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
+ if (!p4d)
+ goto out;
+ pgd_populate(&init_mm, pgd, p4d);
+ }
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d)) {
+ if (!alloc)
+ goto out;
+ pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
+ if (!pud)
+ goto out;
+ p4d_populate(&init_mm, p4d, pud);
+ }
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud)) {
+ if (!alloc)
+ goto out;
+ pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ if (!pmd)
+ goto out;
+ pud_populate(&init_mm, pud, pmd);
+ } else if (WARN_ON_ONCE(pud_large(*pud))) {
+ goto out;
+ }
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd)) {
+ if (!alloc)
+ goto out;
+ pte = vmem_pte_alloc();
+ if (!pte)
+ goto out;
+ pmd_populate(&init_mm, pmd, pte);
+ } else if (WARN_ON_ONCE(pmd_large(*pmd))) {
+ goto out;
+ }
+ ptep = pte_offset_kernel(pmd, addr);
+out:
+ return ptep;
+}
+
+int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc)
+{
+ pte_t *ptep, pte;
+
+ if (!IS_ALIGNED(addr, PAGE_SIZE))
+ return -EINVAL;
+ ptep = vmem_get_alloc_pte(addr, alloc);
+ if (!ptep)
+ return -ENOMEM;
+ __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
+ pte = mk_pte_phys(phys, prot);
+ set_pte(ptep, pte);
+ return 0;
+}
+
+int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot)
+{
+ int rc;
+
+ mutex_lock(&vmem_mutex);
+ rc = __vmem_map_4k_page(addr, phys, prot, true);
+ mutex_unlock(&vmem_mutex);
+ return rc;
+}
+
+void vmem_unmap_4k_page(unsigned long addr)
+{
+ pte_t *ptep;
+
+ mutex_lock(&vmem_mutex);
+ ptep = virt_to_kpte(addr);
+ __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
+ pte_clear(&init_mm, addr, ptep);
+ mutex_unlock(&vmem_mutex);
+}
+
+/*
* map whole physical memory to virtual memory (identity mapping)
* we reserve enough space in the vmalloc area for vmemmap to hotplug
* additional memory segments.
@@ -584,6 +681,9 @@ void __init vmem_map_init(void)
__set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
+ /* lowcore requires 4k mapping for real addresses / prefixing */
+ set_memory_4k(0, LC_PAGES);
+
/* lowcore must be executable for LPSWE */
if (!static_key_enabled(&cpu_has_bear))
set_memory_x(0, 1);
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index bf557a1b789c..5ae31ca9dd44 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -5,5 +5,5 @@
obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
- pci_bus.o
+ pci_bus.o pci_kvm_hook.o
obj-$(CONFIG_PCI_IOV) += pci_iov.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index bc980fd313d5..73cdc5539384 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -61,6 +61,12 @@ DEFINE_STATIC_KEY_FALSE(have_mio);
static struct kmem_cache *zdev_fmb_cache;
+/* AEN structures that must be preserved over KVM module re-insertion */
+union zpci_sic_iib *zpci_aipb;
+EXPORT_SYMBOL_GPL(zpci_aipb);
+struct airq_iv *zpci_aif_sbv;
+EXPORT_SYMBOL_GPL(zpci_aif_sbv);
+
struct zpci_dev *get_zdev_by_fid(u32 fid)
{
struct zpci_dev *tmp, *zdev = NULL;
@@ -120,11 +126,13 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
fib.pba = base;
fib.pal = limit;
fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
+ fib.gd = zdev->gisa;
cc = zpci_mod_fc(req, &fib, &status);
if (cc)
zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
return cc;
}
+EXPORT_SYMBOL_GPL(zpci_register_ioat);
/* Modify PCI: Unregister I/O address translation parameters */
int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
@@ -133,6 +141,8 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
struct zpci_fib fib = {0};
u8 cc, status;
+ fib.gd = zdev->gisa;
+
cc = zpci_mod_fc(req, &fib, &status);
if (cc)
zpci_dbg(3, "unreg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
@@ -160,6 +170,7 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
atomic64_set(&zdev->unmapped_pages, 0);
fib.fmb_addr = virt_to_phys(zdev->fmb);
+ fib.gd = zdev->gisa;
cc = zpci_mod_fc(req, &fib, &status);
if (cc) {
kmem_cache_free(zdev_fmb_cache, zdev->fmb);
@@ -178,6 +189,8 @@ int zpci_fmb_disable_device(struct zpci_dev *zdev)
if (!zdev->fmb)
return -EINVAL;
+ fib.gd = zdev->gisa;
+
/* Function measurement is disabled if fmb address is zero */
cc = zpci_mod_fc(req, &fib, &status);
if (cc == 3) /* Function already gone. */
@@ -700,6 +713,7 @@ int zpci_enable_device(struct zpci_dev *zdev)
zpci_update_fh(zdev, fh);
return rc;
}
+EXPORT_SYMBOL_GPL(zpci_enable_device);
int zpci_disable_device(struct zpci_dev *zdev)
{
@@ -723,6 +737,7 @@ int zpci_disable_device(struct zpci_dev *zdev)
}
return rc;
}
+EXPORT_SYMBOL_GPL(zpci_disable_device);
/**
* zpci_hot_reset_device - perform a reset of the given zPCI function
@@ -816,6 +831,7 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
kref_init(&zdev->kref);
mutex_init(&zdev->lock);
+ mutex_init(&zdev->kzdev_lock);
rc = zpci_init_iommu(zdev);
if (rc)
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 5d77acbd1c87..6a8da1b742ae 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -145,9 +145,6 @@ int zpci_bus_scan_bus(struct zpci_bus *zbus)
struct zpci_dev *zdev;
int devfn, rc, ret = 0;
- if (!zbus->function[0])
- return 0;
-
for (devfn = 0; devfn < ZPCI_FUNCTIONS_PER_BUS; devfn++) {
zdev = zbus->function[devfn];
if (zdev && zdev->state == ZPCI_FN_STATE_CONFIGURED) {
@@ -184,26 +181,26 @@ void zpci_bus_scan_busses(void)
/* zpci_bus_create_pci_bus - Create the PCI bus associated with this zbus
* @zbus: the zbus holding the zdevices
- * @f0: function 0 of the bus
+ * @fr: PCI root function that will determine the bus's domain, and bus speeed
* @ops: the pci operations
*
- * Function zero is taken as a parameter as this is used to determine the
- * domain, multifunction property and maximum bus speed of the entire bus.
+ * The PCI function @fr determines the domain (its UID), multifunction property
+ * and maximum bus speed of the entire bus.
*
* Return: 0 on success, an error code otherwise
*/
-static int zpci_bus_create_pci_bus(struct zpci_bus *zbus, struct zpci_dev *f0, struct pci_ops *ops)
+static int zpci_bus_create_pci_bus(struct zpci_bus *zbus, struct zpci_dev *fr, struct pci_ops *ops)
{
struct pci_bus *bus;
int domain;
- domain = zpci_alloc_domain((u16)f0->uid);
+ domain = zpci_alloc_domain((u16)fr->uid);
if (domain < 0)
return domain;
zbus->domain_nr = domain;
- zbus->multifunction = f0->rid_available;
- zbus->max_bus_speed = f0->max_bus_speed;
+ zbus->multifunction = fr->rid_available;
+ zbus->max_bus_speed = fr->max_bus_speed;
/*
* Note that the zbus->resources are taken over and zbus->resources
@@ -303,47 +300,6 @@ void pcibios_bus_add_device(struct pci_dev *pdev)
}
}
-/* zpci_bus_create_hotplug_slots - Add hotplug slot(s) for device added to bus
- * @zdev: the zPCI device that was newly added
- *
- * Add the hotplug slot(s) for the newly added PCI function. Normally this is
- * simply the slot for the function itself. If however we are adding the
- * function 0 on a zbus, it might be that we already registered functions on
- * that zbus but could not create their hotplug slots yet so add those now too.
- *
- * Return: 0 on success, an error code otherwise
- */
-static int zpci_bus_create_hotplug_slots(struct zpci_dev *zdev)
-{
- struct zpci_bus *zbus = zdev->zbus;
- int devfn, rc = 0;
-
- rc = zpci_init_slot(zdev);
- if (rc)
- return rc;
- zdev->has_hp_slot = 1;
-
- if (zdev->devfn == 0 && zbus->multifunction) {
- /* Now that function 0 is there we can finally create the
- * hotplug slots for those functions with devfn != 0 that have
- * been parked in zbus->function[] waiting for us to be able to
- * create the PCI bus.
- */
- for (devfn = 1; devfn < ZPCI_FUNCTIONS_PER_BUS; devfn++) {
- zdev = zbus->function[devfn];
- if (zdev && !zdev->has_hp_slot) {
- rc = zpci_init_slot(zdev);
- if (rc)
- return rc;
- zdev->has_hp_slot = 1;
- }
- }
-
- }
-
- return rc;
-}
-
static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)
{
int rc = -EINVAL;
@@ -352,21 +308,19 @@ static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)
pr_err("devfn %04x is already assigned\n", zdev->devfn);
return rc;
}
+
zdev->zbus = zbus;
zbus->function[zdev->devfn] = zdev;
zpci_nb_devices++;
- if (zbus->bus) {
- if (zbus->multifunction && !zdev->rid_available) {
- WARN_ONCE(1, "rid_available not set for multifunction\n");
- goto error;
- }
-
- zpci_bus_create_hotplug_slots(zdev);
- } else {
- /* Hotplug slot will be created once function 0 appears */
- zbus->multifunction = 1;
+ if (zbus->multifunction && !zdev->rid_available) {
+ WARN_ONCE(1, "rid_available not set for multifunction\n");
+ goto error;
}
+ rc = zpci_init_slot(zdev);
+ if (rc)
+ goto error;
+ zdev->has_hp_slot = 1;
return 0;
@@ -400,7 +354,11 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
return -ENOMEM;
}
- if (zdev->devfn == 0) {
+ if (!zbus->bus) {
+ /* The UID of the first PCI function registered with a zpci_bus
+ * is used as the domain number for that bus. Currently there
+ * is exactly one zpci_bus per domain.
+ */
rc = zpci_bus_create_pci_bus(zbus, zdev, ops);
if (rc)
goto error;
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 375e0a5120bc..ee367798e388 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -106,6 +106,8 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
zdev->max_msi = response->noi;
zdev->fmb_update = response->mui;
zdev->version = response->version;
+ zdev->maxstbl = response->maxstbl;
+ zdev->dtsm = response->dtsm;
switch (response->version) {
case 1:
@@ -229,12 +231,16 @@ static int clp_set_pci_fn(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as, u8 comma
{
struct clp_req_rsp_set_pci *rrb;
int rc, retries = 100;
+ u32 gisa = 0;
*fh = 0;
rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
+ if (command != CLP_SET_DISABLE_PCI_FN)
+ gisa = zdev->gisa;
+
do {
memset(rrb, 0, sizeof(*rrb));
rrb->request.hdr.len = sizeof(rrb->request);
@@ -243,6 +249,7 @@ static int clp_set_pci_fn(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as, u8 comma
rrb->request.fh = zdev->fh;
rrb->request.oc = command;
rrb->request.ndas = nr_dma_as;
+ rrb->request.gisa = gisa;
rc = clp_req(rrb, CLP_LPS_PCI);
if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) {
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index f46833a25526..227cf0a62800 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -666,7 +666,7 @@ static int __init dma_alloc_cpu_table_caches(void)
int __init zpci_dma_init(void)
{
- s390_iommu_aperture = (u64)high_memory;
+ s390_iommu_aperture = (u64)virt_to_phys(high_memory);
if (!s390_iommu_aperture_factor)
s390_iommu_aperture = ULONG_MAX;
else
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 1a822b7799f8..56480be48244 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -92,6 +92,7 @@ u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status)
return cc;
}
+EXPORT_SYMBOL_GPL(zpci_mod_fc);
/* Refresh PCI Translations */
static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
@@ -138,7 +139,7 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
}
/* Set Interruption Controls */
-int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
+int zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
{
if (!test_facility(72))
return -EIO;
@@ -149,6 +150,7 @@ int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
return 0;
}
+EXPORT_SYMBOL_GPL(zpci_set_irq_ctrl);
/* PCI Load */
static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status)
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 500cd2dbdf53..a2b42a63a53b 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -11,16 +11,10 @@
#include <asm/isc.h>
#include <asm/airq.h>
+#include <asm/tpi.h>
static enum {FLOATING, DIRECTED} irq_delivery;
-#define SIC_IRQ_MODE_ALL 0
-#define SIC_IRQ_MODE_SINGLE 1
-#define SIC_IRQ_MODE_DIRECT 4
-#define SIC_IRQ_MODE_D_ALL 16
-#define SIC_IRQ_MODE_D_SINGLE 17
-#define SIC_IRQ_MODE_SET_CPU 18
-
/*
* summary bit vector
* FLOATING - summary bit per function
@@ -49,6 +43,7 @@ static int zpci_set_airq(struct zpci_dev *zdev)
fib.fmt0.aibvo = 0; /* each zdev has its own interrupt vector */
fib.fmt0.aisb = virt_to_phys(zpci_sbv->vector) + (zdev->aisb / 64) * 8;
fib.fmt0.aisbo = zdev->aisb & 63;
+ fib.gd = zdev->gisa;
return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
}
@@ -60,6 +55,8 @@ static int zpci_clear_airq(struct zpci_dev *zdev)
struct zpci_fib fib = {0};
u8 cc, status;
+ fib.gd = zdev->gisa;
+
cc = zpci_mod_fc(req, &fib, &status);
if (cc == 3 || (cc == 1 && status == 24))
/* Function already gone or IRQs already deregistered. */
@@ -78,6 +75,7 @@ static int zpci_set_directed_irq(struct zpci_dev *zdev)
fib.fmt = 1;
fib.fmt1.noi = zdev->msi_nr_irqs;
fib.fmt1.dibvo = zdev->msi_first_bit;
+ fib.gd = zdev->gisa;
return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
}
@@ -90,6 +88,7 @@ static int zpci_clear_directed_irq(struct zpci_dev *zdev)
u8 cc, status;
fib.fmt = 1;
+ fib.gd = zdev->gisa;
cc = zpci_mod_fc(req, &fib, &status);
if (cc == 3 || (cc == 1 && status == 24))
/* Function already gone or IRQs already deregistered. */
@@ -153,6 +152,7 @@ static struct irq_chip zpci_irq_chip = {
static void zpci_handle_cpu_local_irq(bool rescan)
{
struct airq_iv *dibv = zpci_ibv[smp_processor_id()];
+ union zpci_sic_iib iib = {{0}};
unsigned long bit;
int irqs_on = 0;
@@ -164,7 +164,7 @@ static void zpci_handle_cpu_local_irq(bool rescan)
/* End of second scan with interrupts on. */
break;
/* First scan complete, reenable interrupts. */
- if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC))
+ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &iib))
break;
bit = 0;
continue;
@@ -192,6 +192,7 @@ static void zpci_handle_remote_irq(void *data)
static void zpci_handle_fallback_irq(void)
{
struct cpu_irq_data *cpu_data;
+ union zpci_sic_iib iib = {{0}};
unsigned long cpu;
int irqs_on = 0;
@@ -202,7 +203,7 @@ static void zpci_handle_fallback_irq(void)
/* End of second scan with interrupts on. */
break;
/* First scan complete, reenable interrupts. */
- if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC))
+ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib))
break;
cpu = 0;
continue;
@@ -216,8 +217,11 @@ static void zpci_handle_fallback_irq(void)
}
}
-static void zpci_directed_irq_handler(struct airq_struct *airq, bool floating)
+static void zpci_directed_irq_handler(struct airq_struct *airq,
+ struct tpi_info *tpi_info)
{
+ bool floating = !tpi_info->directed_irq;
+
if (floating) {
inc_irq_stat(IRQIO_PCF);
zpci_handle_fallback_irq();
@@ -227,8 +231,10 @@ static void zpci_directed_irq_handler(struct airq_struct *airq, bool floating)
}
}
-static void zpci_floating_irq_handler(struct airq_struct *airq, bool floating)
+static void zpci_floating_irq_handler(struct airq_struct *airq,
+ struct tpi_info *tpi_info)
{
+ union zpci_sic_iib iib = {{0}};
unsigned long si, ai;
struct airq_iv *aibv;
int irqs_on = 0;
@@ -242,7 +248,7 @@ static void zpci_floating_irq_handler(struct airq_struct *airq, bool floating)
/* End of second scan with interrupts on. */
break;
/* First scan complete, reenable interrupts. */
- if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC))
+ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib))
break;
si = 0;
continue;
@@ -291,7 +297,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
zdev->aisb = bit;
/* Create adapter interrupt vector */
- zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
+ zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL);
if (!zdev->aibv)
return -ENOMEM;
@@ -402,11 +408,12 @@ static struct airq_struct zpci_airq = {
static void __init cpu_enable_directed_irq(void *unused)
{
union zpci_sic_iib iib = {{0}};
+ union zpci_sic_iib ziib = {{0}};
iib.cdiib.dibv_addr = (u64) zpci_ibv[smp_processor_id()]->vector;
- __zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
- zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC);
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &ziib);
}
static int __init zpci_directed_irq_init(void)
@@ -414,14 +421,14 @@ static int __init zpci_directed_irq_init(void)
union zpci_sic_iib iib = {{0}};
unsigned int cpu;
- zpci_sbv = airq_iv_create(num_possible_cpus(), 0);
+ zpci_sbv = airq_iv_create(num_possible_cpus(), 0, NULL);
if (!zpci_sbv)
return -ENOMEM;
iib.diib.isc = PCI_ISC;
iib.diib.nr_cpus = num_possible_cpus();
iib.diib.disb_addr = virt_to_phys(zpci_sbv->vector);
- __zpci_set_irq_ctrl(SIC_IRQ_MODE_DIRECT, 0, &iib);
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_DIRECT, 0, &iib);
zpci_ibv = kcalloc(num_possible_cpus(), sizeof(*zpci_ibv),
GFP_KERNEL);
@@ -436,7 +443,7 @@ static int __init zpci_directed_irq_init(void)
zpci_ibv[cpu] = airq_iv_create(cache_line_size() * BITS_PER_BYTE,
AIRQ_IV_DATA |
AIRQ_IV_CACHELINE |
- (!cpu ? AIRQ_IV_ALLOC : 0));
+ (!cpu ? AIRQ_IV_ALLOC : 0), NULL);
if (!zpci_ibv[cpu])
return -ENOMEM;
}
@@ -453,7 +460,7 @@ static int __init zpci_floating_irq_init(void)
if (!zpci_ibv)
return -ENOMEM;
- zpci_sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
+ zpci_sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL);
if (!zpci_sbv)
goto out_free;
@@ -466,6 +473,7 @@ out_free:
int __init zpci_irq_init(void)
{
+ union zpci_sic_iib iib = {{0}};
int rc;
irq_delivery = sclp.has_dirq ? DIRECTED : FLOATING;
@@ -497,7 +505,7 @@ int __init zpci_irq_init(void)
* Enable floating IRQs (with suppression after one IRQ). When using
* directed IRQs this enables the fallback path.
*/
- zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC);
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib);
return 0;
out_airq:
diff --git a/arch/s390/pci/pci_kvm_hook.c b/arch/s390/pci/pci_kvm_hook.c
new file mode 100644
index 000000000000..ff34baf50a3e
--- /dev/null
+++ b/arch/s390/pci/pci_kvm_hook.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VFIO ZPCI devices support
+ *
+ * Copyright (C) IBM Corp. 2022. All rights reserved.
+ * Author(s): Pierre Morel <pmorel@linux.ibm.com>
+ */
+#include <linux/kvm_host.h>
+
+struct zpci_kvm_hook zpci_kvm_hook;
+EXPORT_SYMBOL_GPL(zpci_kvm_hook);
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index 080c88620723..588089332931 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -64,7 +64,7 @@ static inline int __pcistg_mio_inuser(
asm volatile (
" sacf 256\n"
"0: llgc %[tmp],0(%[src])\n"
- " sllg %[val],%[val],8\n"
+ "4: sllg %[val],%[val],8\n"
" aghi %[src],1\n"
" ogr %[val],%[tmp]\n"
" brctg %[cnt],0b\n"
@@ -72,7 +72,7 @@ static inline int __pcistg_mio_inuser(
"2: ipm %[cc]\n"
" srl %[cc],28\n"
"3: sacf 768\n"
- EX_TABLE(0b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b)
+ EX_TABLE(0b, 3b) EX_TABLE(4b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b)
:
[src] "+a" (src), [cnt] "+d" (cnt),
[val] "+d" (val), [tmp] "=d" (tmp),
@@ -215,10 +215,10 @@ static inline int __pcilg_mio_inuser(
"2: ahi %[shift],-8\n"
" srlg %[tmp],%[val],0(%[shift])\n"
"3: stc %[tmp],0(%[dst])\n"
- " aghi %[dst],1\n"
+ "5: aghi %[dst],1\n"
" brctg %[cnt],2b\n"
"4: sacf 768\n"
- EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b)
+ EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b) EX_TABLE(5b, 4b)
:
[ioaddr_len] "+&d" (ioaddr_len.pair),
[cc] "+d" (cc), [val] "=d" (val),
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index 360ada80d20c..d237bc6841cb 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -48,7 +48,6 @@ OBJCOPYFLAGS_purgatory.ro += --remove-section='.note.*'
$(obj)/purgatory.ro: $(obj)/purgatory $(obj)/purgatory.chk FORCE
$(call if_changed,objcopy)
-$(obj)/kexec-purgatory.o: $(obj)/kexec-purgatory.S $(obj)/purgatory.ro FORCE
- $(call if_changed_rule,as_o_S)
+$(obj)/kexec-purgatory.o: $(obj)/purgatory.ro
-obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += kexec-purgatory.o
+obj-y += kexec-purgatory.o
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 530dd941d140..cb0aff5c0187 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -111,6 +111,7 @@ static struct facility_def facility_defs[] = {
193, /* bear enhancement facility */
194, /* rdp enhancement facility */
196, /* processor activity instrumentation facility */
+ 197, /* processor activity instrumentation extension 1 */
-1 /* END */
}
},

Privacy Policy