diff options
Diffstat (limited to 'arch/s390')
63 files changed, 2088 insertions, 1386 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 318fce77601d..7fd08755a1f9 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -26,6 +26,10 @@ config GENERIC_BUG config GENERIC_BUG_RELATIVE_POINTERS def_bool y +config GENERIC_CSUM + bool + default y if KASAN + config GENERIC_LOCKBREAK def_bool y if PREEMPTION @@ -73,6 +77,7 @@ config S390 select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV select ARCH_HAS_MEM_ENCRYPT + select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SCALED_CPUTIME select ARCH_HAS_SET_MEMORY @@ -121,6 +126,7 @@ config S390 select ARCH_WANTS_NO_INSTR select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_IPC_PARSE_VERSION + select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 select DMA_OPS if PCI @@ -196,6 +202,7 @@ config S390 select HAVE_RSEQ select HAVE_SAMPLE_FTRACE_DIRECT select HAVE_SAMPLE_FTRACE_DIRECT_MULTI + select HAVE_SETUP_PER_CPU_AREA select HAVE_SOFTIRQ_ON_OWN_STACK select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING @@ -207,6 +214,7 @@ config S390 select MMU_GATHER_MERGE_VMAS select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE if PCI + select NEED_PER_CPU_EMBED_FIRST_CHUNK select NEED_SG_DMA_LENGTH if PCI select OLD_SIGACTION select OLD_SIGSUSPEND3 @@ -568,8 +576,7 @@ config EXPOLINE_FULL endchoice config RELOCATABLE - bool "Build a relocatable kernel" - default y + def_bool y help This builds a kernel image that retains relocation information so it can be loaded at an arbitrary address. @@ -578,10 +585,11 @@ config RELOCATABLE bootup process. The relocations make the kernel image about 15% larger (compressed 10%), but are discarded at runtime. + Note: this option exists only for documentation purposes, please do + not remove it. config RANDOMIZE_BASE bool "Randomize the address of the kernel image (KASLR)" - depends on RELOCATABLE default y help In support of Kernel Address Space Layout Randomization (KASLR), diff --git a/arch/s390/Makefile b/arch/s390/Makefile index de6d8b2ea4d8..b3235ab0ace8 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -14,10 +14,8 @@ KBUILD_AFLAGS_MODULE += -fPIC KBUILD_CFLAGS_MODULE += -fPIC KBUILD_AFLAGS += -m64 KBUILD_CFLAGS += -m64 -ifeq ($(CONFIG_RELOCATABLE),y) KBUILD_CFLAGS += -fPIE LDFLAGS_vmlinux := -pie -endif aflags_dwarf := -Wa,-gdwarf-2 KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__ ifndef CONFIG_AS_IS_LLVM diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index d74a4c7d5df6..c0fd29133f27 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -26,8 +26,6 @@ #include <linux/notifier.h> #include <linux/cpu.h> #include <linux/workqueue.h> -#include <linux/suspend.h> -#include <linux/platform_device.h> #include <asm/appldata.h> #include <asm/vtimer.h> #include <linux/uaccess.h> @@ -44,8 +42,6 @@ #define TOD_MICRO 0x01000 /* nr. of TOD clock units for 1 microsecond */ -static struct platform_device *appldata_pdev; - /* * /proc entries (sysctl) */ @@ -88,7 +84,6 @@ static struct vtimer_list appldata_timer; static DEFINE_SPINLOCK(appldata_timer_lock); static int appldata_interval = APPLDATA_CPU_INTERVAL; static int appldata_timer_active; -static int appldata_timer_suspended = 0; /* * Work queue @@ -412,88 +407,6 @@ void appldata_unregister_ops(struct appldata_ops *ops) /********************** module-ops management <END> **************************/ -/**************************** suspend / resume *******************************/ -static int appldata_freeze(struct device *dev) -{ - struct appldata_ops *ops; - int rc; - struct list_head *lh; - - spin_lock(&appldata_timer_lock); - if (appldata_timer_active) { - __appldata_vtimer_setup(APPLDATA_DEL_TIMER); - appldata_timer_suspended = 1; - } - spin_unlock(&appldata_timer_lock); - - mutex_lock(&appldata_ops_mutex); - list_for_each(lh, &appldata_ops_list) { - ops = list_entry(lh, struct appldata_ops, list); - if (ops->active == 1) { - rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC, - (unsigned long) ops->data, ops->size, - ops->mod_lvl); - if (rc != 0) - pr_err("Stopping the data collection for %s " - "failed with rc=%d\n", ops->name, rc); - } - } - mutex_unlock(&appldata_ops_mutex); - return 0; -} - -static int appldata_restore(struct device *dev) -{ - struct appldata_ops *ops; - int rc; - struct list_head *lh; - - spin_lock(&appldata_timer_lock); - if (appldata_timer_suspended) { - __appldata_vtimer_setup(APPLDATA_ADD_TIMER); - appldata_timer_suspended = 0; - } - spin_unlock(&appldata_timer_lock); - - mutex_lock(&appldata_ops_mutex); - list_for_each(lh, &appldata_ops_list) { - ops = list_entry(lh, struct appldata_ops, list); - if (ops->active == 1) { - ops->callback(ops->data); // init record - rc = appldata_diag(ops->record_nr, - APPLDATA_START_INTERVAL_REC, - (unsigned long) ops->data, ops->size, - ops->mod_lvl); - if (rc != 0) { - pr_err("Starting the data collection for %s " - "failed with rc=%d\n", ops->name, rc); - } - } - } - mutex_unlock(&appldata_ops_mutex); - return 0; -} - -static int appldata_thaw(struct device *dev) -{ - return appldata_restore(dev); -} - -static const struct dev_pm_ops appldata_pm_ops = { - .freeze = appldata_freeze, - .thaw = appldata_thaw, - .restore = appldata_restore, -}; - -static struct platform_driver appldata_pdrv = { - .driver = { - .name = "appldata", - .pm = &appldata_pm_ops, - }, -}; -/************************* suspend / resume <END> ****************************/ - - /******************************* init / exit *********************************/ /* @@ -503,36 +416,14 @@ static struct platform_driver appldata_pdrv = { */ static int __init appldata_init(void) { - int rc; - init_virt_timer(&appldata_timer); appldata_timer.function = appldata_timer_function; appldata_timer.data = (unsigned long) &appldata_work; - - rc = platform_driver_register(&appldata_pdrv); - if (rc) - return rc; - - appldata_pdev = platform_device_register_simple("appldata", -1, NULL, - 0); - if (IS_ERR(appldata_pdev)) { - rc = PTR_ERR(appldata_pdev); - goto out_driver; - } appldata_wq = alloc_ordered_workqueue("appldata", 0); - if (!appldata_wq) { - rc = -ENOMEM; - goto out_device; - } - + if (!appldata_wq) + return -ENOMEM; appldata_sysctl_header = register_sysctl_table(appldata_dir_table); return 0; - -out_device: - platform_device_unregister(appldata_pdev); -out_driver: - platform_driver_unregister(&appldata_pdrv); - return rc; } __initcall(appldata_init); diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 883357a211a3..d52c3e2e16bc 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -37,9 +37,8 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o -obj-y += version.o pgm_check_info.o ctype.o ipl_data.o +obj-y += version.o pgm_check_info.o ctype.o ipl_data.o machine_kexec_reloc.o obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o -obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index ca78d6162245..c1f8f7999fed 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -77,6 +77,9 @@ bool is_ipl_block_dump(void) if (ipl_block.pb0_hdr.pbt == IPL_PBT_NVME && ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP) return true; + if (ipl_block.pb0_hdr.pbt == IPL_PBT_ECKD && + ipl_block.eckd.opt == IPL_PB0_ECKD_OPT_DUMP) + return true; return false; } @@ -108,6 +111,11 @@ static size_t ipl_block_get_ascii_scpdata(char *dest, size_t size, scp_data_len = ipb->nvme.scp_data_len; scp_data = ipb->nvme.scp_data; break; + case IPL_PBT_ECKD: + scp_data_len = ipb->eckd.scp_data_len; + scp_data = ipb->eckd.scp_data; + break; + default: goto out; } @@ -153,6 +161,7 @@ static void append_ipl_block_parm(void) break; case IPL_PBT_FCP: case IPL_PBT_NVME: + case IPL_PBT_ECKD: rc = ipl_block_get_ascii_scpdata( parm, COMMAND_LINE_SIZE - len - 1, &ipl_block); break; diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 6e7f01ca53e6..47ca3264c023 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -291,8 +291,7 @@ void startup_kernel(void) clear_bss_section(); copy_bootdata(); - if (IS_ENABLED(CONFIG_RELOCATABLE)) - handle_relocs(__kaslr_offset); + handle_relocs(__kaslr_offset); if (__kaslr_offset) { /* diff --git a/arch/s390/configs/btf.config b/arch/s390/configs/btf.config new file mode 100644 index 000000000000..39227b4511af --- /dev/null +++ b/arch/s390/configs/btf.config @@ -0,0 +1 @@ +CONFIG_DEBUG_INFO_BTF=y diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 2a827002934b..a7b4e1d82758 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -573,8 +573,6 @@ CONFIG_VIRTIO_CONSOLE=m CONFIG_HW_RANDOM_VIRTIO=m CONFIG_HANGCHECK_TIMER=m CONFIG_TN3270_FS=y -# CONFIG_RANDOM_TRUST_CPU is not set -# CONFIG_RANDOM_TRUST_BOOTLOADER is not set CONFIG_PPS=m # CONFIG_PTP_1588_CLOCK is not set # CONFIG_HWMON is not set @@ -723,52 +721,42 @@ CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_SM2=m CONFIG_CRYPTO_CURVE25519=m -CONFIG_CRYPTO_GCM=y -CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_SEQIV=y -CONFIG_CRYPTO_CFB=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_ADIANTUM=m -CONFIG_CRYPTO_HCTR2=m -CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_CRC32_S390=y -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m -CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SHA3_256_S390=m -CONFIG_CRYPTO_SHA3_512_S390=m -CONFIG_CRYPTO_SM3_GENERIC=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_TI=m -CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m -CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_SEED=m -CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_CFB=m +CONFIG_CRYPTO_HCTR2=m +CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_AEGIS128=m +CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_GCM=y +CONFIG_CRYPTO_SEQIV=y +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3_GENERIC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m @@ -779,6 +767,16 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y +CONFIG_CRYPTO_CRC32_S390=y +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m +CONFIG_CRYPTO_SHA3_256_S390=m +CONFIG_CRYPTO_SHA3_512_S390=m +CONFIG_CRYPTO_GHASH_S390=m +CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_DES_S390=m +CONFIG_CRYPTO_CHACHA_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@ -797,7 +795,6 @@ CONFIG_CMA_SIZE_MBYTES=0 CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO_DWARF4=y -CONFIG_DEBUG_INFO_BTF=y CONFIG_GDB_SCRIPTS=y CONFIG_HEADERS_INSTALL=y CONFIG_DEBUG_SECTION_MISMATCH=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index fb780e80e4c8..2bc2d0fe5774 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -563,8 +563,6 @@ CONFIG_VIRTIO_CONSOLE=m CONFIG_HW_RANDOM_VIRTIO=m CONFIG_HANGCHECK_TIMER=m CONFIG_TN3270_FS=y -# CONFIG_RANDOM_TRUST_CPU is not set -# CONFIG_RANDOM_TRUST_BOOTLOADER is not set # CONFIG_PTP_1588_CLOCK is not set # CONFIG_HWMON is not set CONFIG_WATCHDOG=y @@ -707,53 +705,43 @@ CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_SM2=m CONFIG_CRYPTO_CURVE25519=m -CONFIG_CRYPTO_GCM=y -CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_SEQIV=y -CONFIG_CRYPTO_CFB=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_OFB=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_ADIANTUM=m -CONFIG_CRYPTO_HCTR2=m -CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_CRC32_S390=y -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m -CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SHA3_256_S390=m -CONFIG_CRYPTO_SHA3_512_S390=m -CONFIG_CRYPTO_SM3_GENERIC=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_TI=m -CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m -CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_SEED=m -CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_CFB=m +CONFIG_CRYPTO_HCTR2=m +CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_OFB=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_AEGIS128=m +CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_GCM=y +CONFIG_CRYPTO_SEQIV=y +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3_GENERIC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m @@ -764,6 +752,16 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y +CONFIG_CRYPTO_CRC32_S390=y +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m +CONFIG_CRYPTO_SHA3_256_S390=m +CONFIG_CRYPTO_SHA3_512_S390=m +CONFIG_CRYPTO_GHASH_S390=m +CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_DES_S390=m +CONFIG_CRYPTO_CHACHA_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@ -781,7 +779,6 @@ CONFIG_CMA_SIZE_MBYTES=0 CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO_DWARF4=y -CONFIG_DEBUG_INFO_BTF=y CONFIG_GDB_SCRIPTS=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/s390/configs/kasan.config b/arch/s390/configs/kasan.config new file mode 100644 index 000000000000..700a8b25c3ff --- /dev/null +++ b/arch/s390/configs/kasan.config @@ -0,0 +1,3 @@ +CONFIG_KASAN=y +CONFIG_KASAN_INLINE=y +CONFIG_KASAN_VMALLOC=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index a5576b8d4081..ae14ab0b864d 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -58,7 +58,6 @@ CONFIG_ZFCP=y # CONFIG_VMCP is not set # CONFIG_MONWRITER is not set # CONFIG_S390_VMUR is not set -# CONFIG_RANDOM_TRUST_BOOTLOADER is not set # CONFIG_HID is not set # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set @@ -74,7 +73,6 @@ CONFIG_PRINTK_TIME=y # CONFIG_SYMBOLIC_ERRNAME is not set CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF4=y -CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_FS=y CONFIG_PANIC_ON_OOPS=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 6511d15ace45..c3be533c4cd3 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -68,14 +68,6 @@ static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr) return ((struct diag204_x_info_blk_hdr *)hdr)->flags; } -static inline __u16 info_blk_hdr__pcpus(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_info_blk_hdr *)hdr)->phys_cpus; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_info_blk_hdr *)hdr)->phys_cpus; -} - /* Partition header */ static inline int part_hdr__size(enum diag204_format type) diff --git a/arch/s390/include/asm/bugs.h b/arch/s390/include/asm/bugs.h deleted file mode 100644 index aa42a179be33..000000000000 --- a/arch/s390/include/asm/bugs.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * S390 version - * Copyright IBM Corp. 1999 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * Derived from "include/asm-i386/bugs.h" - * Copyright (C) 1994 Linus Torvalds - */ - -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Needs: - * void check_bugs(void); - */ - -static inline void check_bugs(void) -{ - /* s390 has no bugs ... */ -} diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index cdd19d326345..d977a3a2f619 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -12,6 +12,12 @@ #ifndef _S390_CHECKSUM_H #define _S390_CHECKSUM_H +#ifdef CONFIG_GENERIC_CSUM + +#include <asm-generic/checksum.h> + +#else /* CONFIG_GENERIC_CSUM */ + #include <linux/uaccess.h> #include <linux/in6.h> @@ -129,4 +135,5 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, return csum_fold((__force __wsum)(sum >> 32)); } +#endif /* CONFIG_GENERIC_CSUM */ #endif /* _S390_CHECKSUM_H */ diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 6f80ec9c04be..e5c5cb1207e2 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -54,12 +54,33 @@ static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs * return NULL; } -static __always_inline void ftrace_instruction_pointer_set(struct ftrace_regs *fregs, - unsigned long ip) +static __always_inline unsigned long +ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs) +{ + return fregs->regs.psw.addr; +} + +static __always_inline void +ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, + unsigned long ip) { fregs->regs.psw.addr = ip; } +#define ftrace_regs_get_argument(fregs, n) \ + regs_get_kernel_argument(&(fregs)->regs, n) +#define ftrace_regs_get_stack_pointer(fregs) \ + kernel_stack_pointer(&(fregs)->regs) +#define ftrace_regs_return_value(fregs) \ + regs_return_value(&(fregs)->regs) +#define ftrace_regs_set_return_value(fregs, ret) \ + regs_set_return_value(&(fregs)->regs, ret) +#define ftrace_override_function_with_return(fregs) \ + override_function_with_return(&(fregs)->regs) +#define ftrace_regs_query_register_offset(name) \ + regs_query_register_offset(name) + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS /* * When an ftrace registered caller is tracing a function that is * also set by a register_ftrace_direct() call, it needs to be @@ -67,10 +88,12 @@ static __always_inline void ftrace_instruction_pointer_set(struct ftrace_regs *f * place the direct caller in the ORIG_GPR2 part of pt_regs. This * tells the ftrace_caller that there's a direct caller. */ -static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) +static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr) { + struct pt_regs *regs = &fregs->regs; regs->orig_gpr2 = addr; } +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ /* * Even though the system call numbers are identical for s390/s390x a diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index a405b6bb89fb..b0d00032479d 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -22,6 +22,7 @@ struct ipl_parameter_block { struct ipl_pb0_common common; struct ipl_pb0_fcp fcp; struct ipl_pb0_ccw ccw; + struct ipl_pb0_eckd eckd; struct ipl_pb0_nvme nvme; char raw[PAGE_SIZE - sizeof(struct ipl_pl_hdr)]; }; @@ -41,6 +42,10 @@ struct ipl_parameter_block { sizeof(struct ipl_pb0_ccw)) #define IPL_BP0_CCW_LEN (sizeof(struct ipl_pb0_ccw)) +#define IPL_BP_ECKD_LEN (sizeof(struct ipl_pl_hdr) + \ + sizeof(struct ipl_pb0_eckd)) +#define IPL_BP0_ECKD_LEN (sizeof(struct ipl_pb0_eckd)) + #define IPL_MAX_SUPPORTED_VERSION (0) #define IPL_RB_CERT_UNKNOWN ((unsigned short)-1) @@ -68,6 +73,8 @@ enum ipl_type { IPL_TYPE_NSS = 16, IPL_TYPE_NVME = 32, IPL_TYPE_NVME_DUMP = 64, + IPL_TYPE_ECKD = 128, + IPL_TYPE_ECKD_DUMP = 256, }; struct ipl_info @@ -79,6 +86,9 @@ struct ipl_info } ccw; struct { struct ccw_dev_id dev_id; + } eckd; + struct { + struct ccw_dev_id dev_id; u64 wwpn; u64 lun; } fcp; @@ -99,6 +109,7 @@ extern void set_os_info_reipl_block(void); static inline bool is_ipl_type_dump(void) { return (ipl_info.type == IPL_TYPE_FCP_DUMP) || + (ipl_info.type == IPL_TYPE_ECKD_DUMP) || (ipl_info.type == IPL_TYPE_NVME_DUMP); } diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index b1e98a9ed152..d67ce719d16a 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -142,8 +142,7 @@ struct mcck_volatile_info { CR14_EXTERNAL_DAMAGE_SUBMASK) #define SIDAD_SIZE_MASK 0xff -#define sida_origin(sie_block) \ - ((sie_block)->sidad & PAGE_MASK) +#define sida_addr(sie_block) phys_to_virt((sie_block)->sidad & PAGE_MASK) #define sida_size(sie_block) \ ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE) @@ -276,6 +275,7 @@ struct kvm_s390_sie_block { #define ECB3_AES 0x04 #define ECB3_RI 0x01 __u8 ecb3; /* 0x0063 */ +#define ESCA_SCAOL_MASK ~0x3fU __u32 scaol; /* 0x0064 */ __u8 sdf; /* 0x0068 */ __u8 epdx; /* 0x0069 */ @@ -942,6 +942,8 @@ struct kvm_s390_pv { unsigned long stor_base; void *stor_var; bool dumping; + void *set_aside; + struct list_head need_cleanup; struct mmu_notifier mmu_notifier; }; @@ -1017,7 +1019,13 @@ void kvm_arch_crypto_clear_masks(struct kvm *kvm); void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, unsigned long *aqm, unsigned long *adm); -extern int sie64a(struct kvm_s390_sie_block *, u64 *); +int __sie64a(phys_addr_t sie_block_phys, struct kvm_s390_sie_block *sie_block, u64 *rsa); + +static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa) +{ + return __sie64a(virt_to_phys(sie_block), sie_block, rsa); +} + extern char sie_exit; extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc); diff --git a/arch/s390/include/asm/mem_encrypt.h b/arch/s390/include/asm/mem_encrypt.h index 08a8b96606d7..b85e13505a0f 100644 --- a/arch/s390/include/asm/mem_encrypt.h +++ b/arch/s390/include/asm/mem_encrypt.h @@ -4,8 +4,8 @@ #ifndef __ASSEMBLY__ -int set_memory_encrypted(unsigned long addr, int numpages); -int set_memory_decrypted(unsigned long addr, int numpages); +int set_memory_encrypted(unsigned long vaddr, int numpages); +int set_memory_decrypted(unsigned long vaddr, int numpages); #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/pai.h b/arch/s390/include/asm/pai.h index 1a8a6b15d121..7d1888e3dee6 100644 --- a/arch/s390/include/asm/pai.h +++ b/arch/s390/include/asm/pai.h @@ -75,4 +75,10 @@ static __always_inline void pai_kernel_exit(struct pt_regs *regs) WRITE_ONCE(S390_lowcore.ccd, S390_lowcore.ccd & ~PAI_CRYPTO_KERNEL_OFFSET); } +enum paievt_mode { + PAI_MODE_NONE, + PAI_MODE_SAMPLING, + PAI_MODE_COUNTING, +}; + #endif diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 108e732d7b14..b248694e0024 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -117,7 +117,9 @@ struct zpci_bus { struct zpci_dev { struct zpci_bus *zbus; struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */ + struct list_head iommu_list; struct kref kref; + struct rcu_head rcu; struct hotplug_slot hotplug_slot; enum zpci_state state; @@ -155,7 +157,6 @@ struct zpci_dev { /* DMA stuff */ unsigned long *dma_table; - spinlock_t dma_table_lock; int tlb_refresh; spinlock_t iommu_bitmap_lock; @@ -220,7 +221,7 @@ void zpci_device_reserved(struct zpci_dev *zdev); bool zpci_is_device_configured(struct zpci_dev *zdev); int zpci_hot_reset_device(struct zpci_dev *zdev); -int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); +int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64, u8 *); int zpci_unregister_ioat(struct zpci_dev *, u8); void zpci_remove_reserved_devices(void); void zpci_update_fh(struct zpci_dev *zdev, u32 fh); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index f1cb9391190d..b26cbf1c533c 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -763,6 +763,7 @@ static inline int pmd_dirty(pmd_t pmd) return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0; @@ -1773,8 +1774,6 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#define kern_addr_valid(addr) (1) - extern int vmem_add_mapping(unsigned long start, unsigned long size); extern void vmem_remove_mapping(unsigned long start, unsigned long size); extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc); diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 87be3e855bf7..c907f747d2a0 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -199,7 +199,16 @@ unsigned long __get_wchan(struct task_struct *p); /* Has task runtime instrumentation enabled ? */ #define is_ri_task(tsk) (!!(tsk)->thread.ri_cb) -register unsigned long current_stack_pointer asm("r15"); +/* avoid using global register due to gcc bug in versions < 8.4 */ +#define current_stack_pointer (__current_stack_pointer()) + +static __always_inline unsigned long __current_stack_pointer(void) +{ + unsigned long sp; + + asm volatile("lgr %0,15" : "=d" (sp)); + return sp; +} static __always_inline unsigned short stap(void) { diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 9d4c7f71e070..dac7da88f61f 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -87,6 +87,7 @@ struct sclp_info { unsigned char has_gisaf : 1; unsigned char has_diag318 : 1; unsigned char has_sipl : 1; + unsigned char has_sipl_eckd : 1; unsigned char has_dirq : 1; unsigned char has_iplcc : 1; unsigned char has_zpci_lsi : 1; @@ -131,6 +132,7 @@ void sclp_early_get_ipl_info(struct sclp_ipl_info *info); void sclp_early_detect(void); void sclp_early_printk(const char *s); void __sclp_early_printk(const char *s, unsigned int len); +void sclp_emergency_printk(const char *s); int sclp_early_get_memsize(unsigned long *mem); int sclp_early_get_hsa_size(unsigned long *hsa_size); diff --git a/arch/s390/include/asm/serial.h b/arch/s390/include/asm/serial.h deleted file mode 100644 index aaf85a69061c..000000000000 --- a/arch/s390/include/asm/serial.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_S390_SERIAL_H -#define _ASM_S390_SERIAL_H - -#define BASE_BAUD 0 - -#endif /* _ASM_S390_SERIAL_H */ diff --git a/arch/s390/include/asm/shmparam.h b/arch/s390/include/asm/shmparam.h deleted file mode 100644 index e75d45649c54..000000000000 --- a/arch/s390/include/asm/shmparam.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * S390 version - * - * Derived from "include/asm-i386/shmparam.h" - */ -#ifndef _ASM_S390_SHMPARAM_H -#define _ASM_S390_SHMPARAM_H - -#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ - -#endif /* _ASM_S390_SHMPARAM_H */ diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index b23c658dce77..1802be5abb5d 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -46,6 +46,7 @@ struct stack_frame { unsigned long sie_savearea; unsigned long sie_reason; unsigned long sie_flags; + unsigned long sie_control_block_phys; }; }; unsigned long gprs[10]; diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 3a5c8fb590e5..b91f4a9b044c 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -25,7 +25,8 @@ void __tlb_remove_table(void *_table); static inline void tlb_flush(struct mmu_gather *tlb); static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, - struct page *page, int page_size); + struct encoded_page *page, + int page_size); #define tlb_flush tlb_flush #define pte_free_tlb pte_free_tlb @@ -40,11 +41,15 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, * Release the page cache reference for a pte removed by * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page * has already been freed, so just do free_page_and_swap_cache. + * + * s390 doesn't delay rmap removal, so there is nothing encoded in + * the page pointer. */ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, - struct page *page, int page_size) + struct encoded_page *page, + int page_size) { - free_page_and_swap_cache(page); + free_page_and_swap_cache(encoded_page_ptr(page)); return false; } diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index be3ef9dd6972..28a9ad57b6f1 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -34,6 +34,7 @@ #define UVC_CMD_INIT_UV 0x000f #define UVC_CMD_CREATE_SEC_CONF 0x0100 #define UVC_CMD_DESTROY_SEC_CONF 0x0101 +#define UVC_CMD_DESTROY_SEC_CONF_FAST 0x0102 #define UVC_CMD_CREATE_SEC_CPU 0x0120 #define UVC_CMD_DESTROY_SEC_CPU 0x0121 #define UVC_CMD_CONV_TO_SEC_STOR 0x0200 @@ -81,6 +82,7 @@ enum uv_cmds_inst { BIT_UVC_CMD_UNSHARE_ALL = 20, BIT_UVC_CMD_PIN_PAGE_SHARED = 21, BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22, + BIT_UVC_CMD_DESTROY_SEC_CONF_FAST = 23, BIT_UVC_CMD_DUMP_INIT = 24, BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE = 25, BIT_UVC_CMD_DUMP_CPU = 26, @@ -230,6 +232,14 @@ struct uv_cb_nodata { u64 reserved20[4]; } __packed __aligned(8); +/* Destroy Configuration Fast */ +struct uv_cb_destroy_fast { + struct uv_cb_header header; + u64 reserved08[2]; + u64 handle; + u64 reserved20[5]; +} __packed __aligned(8); + /* Set Shared Access */ struct uv_cb_share { struct uv_cb_header header; diff --git a/arch/s390/include/asm/vga.h b/arch/s390/include/asm/vga.h deleted file mode 100644 index 605dc46bac5e..000000000000 --- a/arch/s390/include/asm/vga.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_S390_VGA_H -#define _ASM_S390_VGA_H - -/* Avoid compile errors due to missing asm/vga.h */ - -#endif /* _ASM_S390_VGA_H */ diff --git a/arch/s390/include/asm/vx-insn-asm.h b/arch/s390/include/asm/vx-insn-asm.h new file mode 100644 index 000000000000..360f8b36d962 --- /dev/null +++ b/arch/s390/include/asm/vx-insn-asm.h @@ -0,0 +1,681 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Support for Vector Instructions + * + * Assembler macros to generate .byte/.word code for particular + * vector instructions that are supported by recent binutils (>= 2.26) only. + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + */ + +#ifndef __ASM_S390_VX_INSN_INTERNAL_H +#define __ASM_S390_VX_INSN_INTERNAL_H + +#ifndef __ASM_S390_VX_INSN_H +#error only <asm/vx-insn.h> can be included directly +#endif + +#ifdef __ASSEMBLY__ + +/* Macros to generate vector instruction byte code */ + +/* GR_NUM - Retrieve general-purpose register number + * + * @opd: Operand to store register number + * @r64: String designation register in the format "%rN" + */ +.macro GR_NUM opd gr + \opd = 255 + .ifc \gr,%r0 + \opd = 0 + .endif + .ifc \gr,%r1 + \opd = 1 + .endif + .ifc \gr,%r2 + \opd = 2 + .endif + .ifc \gr,%r3 + \opd = 3 + .endif + .ifc \gr,%r4 + \opd = 4 + .endif + .ifc \gr,%r5 + \opd = 5 + .endif + .ifc \gr,%r6 + \opd = 6 + .endif + .ifc \gr,%r7 + \opd = 7 + .endif + .ifc \gr,%r8 + \opd = 8 + .endif + .ifc \gr,%r9 + \opd = 9 + .endif + .ifc \gr,%r10 + \opd = 10 + .endif + .ifc \gr,%r11 + \opd = 11 + .endif + .ifc \gr,%r12 + \opd = 12 + .endif + .ifc \gr,%r13 + \opd = 13 + .endif + .ifc \gr,%r14 + \opd = 14 + .endif + .ifc \gr,%r15 + \opd = 15 + .endif + .if \opd == 255 + \opd = \gr + .endif +.endm + +/* VX_NUM - Retrieve vector register number + * + * @opd: Operand to store register number + * @vxr: String designation register in the format "%vN" + * + * The vector register number is used for as input number to the + * instruction and, as well as, to compute the RXB field of the + * instruction. + */ +.macro VX_NUM opd vxr + \opd = 255 + .ifc \vxr,%v0 + \opd = 0 + .endif + .ifc \vxr,%v1 + \opd = 1 + .endif + .ifc \vxr,%v2 + \opd = 2 + .endif + .ifc \vxr,%v3 + \opd = 3 + .endif + .ifc \vxr,%v4 + \opd = 4 + .endif + .ifc \vxr,%v5 + \opd = 5 + .endif + .ifc \vxr,%v6 + \opd = 6 + .endif + .ifc \vxr,%v7 + \opd = 7 + .endif + .ifc \vxr,%v8 + \opd = 8 + .endif + .ifc \vxr,%v9 + \opd = 9 + .endif + .ifc \vxr,%v10 + \opd = 10 + .endif + .ifc \vxr,%v11 + \opd = 11 + .endif + .ifc \vxr,%v12 + \opd = 12 + .endif + .ifc \vxr,%v13 + \opd = 13 + .endif + .ifc \vxr,%v14 + \opd = 14 + .endif + .ifc \vxr,%v15 + \opd = 15 + .endif + .ifc \vxr,%v16 + \opd = 16 + .endif + .ifc \vxr,%v17 + \opd = 17 + .endif + .ifc \vxr,%v18 + \opd = 18 + .endif + .ifc \vxr,%v19 + \opd = 19 + .endif + .ifc \vxr,%v20 + \opd = 20 + .endif + .ifc \vxr,%v21 + \opd = 21 + .endif + .ifc \vxr,%v22 + \opd = 22 + .endif + .ifc \vxr,%v23 + \opd = 23 + .endif + .ifc \vxr,%v24 + \opd = 24 + .endif + .ifc \vxr,%v25 + \opd = 25 + .endif + .ifc \vxr,%v26 + \opd = 26 + .endif + .ifc \vxr,%v27 + \opd = 27 + .endif + .ifc \vxr,%v28 + \opd = 28 + .endif + .ifc \vxr,%v29 + \opd = 29 + .endif + .ifc \vxr,%v30 + \opd = 30 + .endif + .ifc \vxr,%v31 + \opd = 31 + .endif + .if \opd == 255 + \opd = \vxr + .endif +.endm + +/* RXB - Compute most significant bit used vector registers + * + * @rxb: Operand to store computed RXB value + * @v1: First vector register designated operand + * @v2: Second vector register designated operand + * @v3: Third vector register designated operand + * @v4: Fourth vector register designated operand + */ +.macro RXB rxb v1 v2=0 v3=0 v4=0 + \rxb = 0 + .if \v1 & 0x10 + \rxb = \rxb | 0x08 + .endif + .if \v2 & 0x10 + \rxb = \rxb | 0x04 + .endif + .if \v3 & 0x10 + \rxb = \rxb | 0x02 + .endif + .if \v4 & 0x10 + \rxb = \rxb | 0x01 + .endif +.endm + +/* MRXB - Generate Element Size Control and RXB value + * + * @m: Element size control + * @v1: First vector register designated operand (for RXB) + * @v2: Second vector register designated operand (for RXB) + * @v3: Third vector register designated operand (for RXB) + * @v4: Fourth vector register designated operand (for RXB) + */ +.macro MRXB m v1 v2=0 v3=0 v4=0 + rxb = 0 + RXB rxb, \v1, \v2, \v3, \v4 + .byte (\m << 4) | rxb +.endm + +/* MRXBOPC - Generate Element Size Control, RXB, and final Opcode fields + * + * @m: Element size control + * @opc: Opcode + * @v1: First vector register designated operand (for RXB) + * @v2: Second vector register designated operand (for RXB) + * @v3: Third vector register designated operand (for RXB) + * @v4: Fourth vector register designated operand (for RXB) + */ +.macro MRXBOPC m opc v1 v2=0 v3=0 v4=0 + MRXB \m, \v1, \v2, \v3, \v4 + .byte \opc +.endm + +/* Vector support instructions */ + +/* VECTOR GENERATE BYTE MASK */ +.macro VGBM vr imm2 + VX_NUM v1, \vr + .word (0xE700 | ((v1&15) << 4)) + .word \imm2 + MRXBOPC 0, 0x44, v1 +.endm +.macro VZERO vxr + VGBM \vxr, 0 +.endm +.macro VONE vxr + VGBM \vxr, 0xFFFF +.endm + +/* VECTOR LOAD VR ELEMENT FROM GR */ +.macro VLVG v, gr, disp, m + VX_NUM v1, \v + GR_NUM b2, "%r0" + GR_NUM r3, \gr + .word 0xE700 | ((v1&15) << 4) | r3 + .word (b2 << 12) | (\disp) + MRXBOPC \m, 0x22, v1 +.endm +.macro VLVGB v, gr, index, base + VLVG \v, \gr, \index, \base, 0 +.endm +.macro VLVGH v, gr, index + VLVG \v, \gr, \index, 1 +.endm +.macro VLVGF v, gr, index + VLVG \v, \gr, \index, 2 +.endm +.macro VLVGG v, gr, index + VLVG \v, \gr, \index, 3 +.endm + +/* VECTOR LOAD REGISTER */ +.macro VLR v1, v2 + VX_NUM v1, \v1 + VX_NUM v2, \v2 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word 0 + MRXBOPC 0, 0x56, v1, v2 +.endm + +/* VECTOR LOAD */ +.macro VL v, disp, index="%r0", base + VX_NUM v1, \v + GR_NUM x2, \index + GR_NUM b2, \base + .word 0xE700 | ((v1&15) << 4) | x2 + .word (b2 << 12) | (\disp) + MRXBOPC 0, 0x06, v1 +.endm + +/* VECTOR LOAD ELEMENT */ +.macro VLEx vr1, disp, index="%r0", base, m3, opc + VX_NUM v1, \vr1 + GR_NUM x2, \index + GR_NUM b2, \base + .word 0xE700 | ((v1&15) << 4) | x2 + .word (b2 << 12) | (\disp) + MRXBOPC \m3, \opc, v1 +.endm +.macro VLEB vr1, disp, index="%r0", base, m3 + VLEx \vr1, \disp, \index, \base, \m3, 0x00 +.endm +.macro VLEH vr1, disp, index="%r0", base, m3 + VLEx \vr1, \disp, \index, \base, \m3, 0x01 +.endm +.macro VLEF vr1, disp, index="%r0", base, m3 + VLEx \vr1, \disp, \index, \base, \m3, 0x03 +.endm +.macro VLEG vr1, disp, index="%r0", base, m3 + VLEx \vr1, \disp, \index, \base, \m3, 0x02 +.endm + +/* VECTOR LOAD ELEMENT IMMEDIATE */ +.macro VLEIx vr1, imm2, m3, opc + VX_NUM v1, \vr1 + .word 0xE700 | ((v1&15) << 4) + .word \imm2 + MRXBOPC \m3, \opc, v1 +.endm +.macro VLEIB vr1, imm2, index + VLEIx \vr1, \imm2, \index, 0x40 +.endm +.macro VLEIH vr1, imm2, index + VLEIx \vr1, \imm2, \index, 0x41 +.endm +.macro VLEIF vr1, imm2, index + VLEIx \vr1, \imm2, \index, 0x43 +.endm +.macro VLEIG vr1, imm2, index + VLEIx \vr1, \imm2, \index, 0x42 +.endm + +/* VECTOR LOAD GR FROM VR ELEMENT */ +.macro VLGV gr, vr, disp, base="%r0", m + GR_NUM r1, \gr + GR_NUM b2, \base + VX_NUM v3, \vr + .word 0xE700 | (r1 << 4) | (v3&15) + .word (b2 << 12) | (\disp) + MRXBOPC \m, 0x21, v3 +.endm +.macro VLGVB gr, vr, disp, base="%r0" + VLGV \gr, \vr, \disp, \base, 0 +.endm +.macro VLGVH gr, vr, disp, base="%r0" + VLGV \gr, \vr, \disp, \base, 1 +.endm +.macro VLGVF gr, vr, disp, base="%r0" + VLGV \gr, \vr, \disp, \base, 2 +.endm +.macro VLGVG gr, vr, disp, base="%r0" + VLGV \gr, \vr, \disp, \base, 3 +.endm + +/* VECTOR LOAD MULTIPLE */ +.macro VLM vfrom, vto, disp, base, hint=3 + VX_NUM v1, \vfrom + VX_NUM v3, \vto + GR_NUM b2, \base + .word 0xE700 | ((v1&15) << 4) | (v3&15) + .word (b2 << 12) | (\disp) + MRXBOPC \hint, 0x36, v1, v3 +.endm + +/* VECTOR STORE */ +.macro VST vr1, disp, index="%r0", base + VX_NUM v1, \vr1 + GR_NUM x2, \index + GR_NUM b2, \base + .word 0xE700 | ((v1&15) << 4) | (x2&15) + .word (b2 << 12) | (\disp) + MRXBOPC 0, 0x0E, v1 +.endm + +/* VECTOR STORE MULTIPLE */ +.macro VSTM vfrom, vto, disp, base, hint=3 + VX_NUM v1, \vfrom + VX_NUM v3, \vto + GR_NUM b2, \base + .word 0xE700 | ((v1&15) << 4) | (v3&15) + .word (b2 << 12) | (\disp) + MRXBOPC \hint, 0x3E, v1, v3 +.endm + +/* VECTOR PERMUTE */ +.macro VPERM vr1, vr2, vr3, vr4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + VX_NUM v4, \vr4 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC (v4&15), 0x8C, v1, v2, v3, v4 +.endm + +/* VECTOR UNPACK LOGICAL LOW */ +.macro VUPLL vr1, vr2, m3 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word 0x0000 + MRXBOPC \m3, 0xD4, v1, v2 +.endm +.macro VUPLLB vr1, vr2 + VUPLL \vr1, \vr2, 0 +.endm +.macro VUPLLH vr1, vr2 + VUPLL \vr1, \vr2, 1 +.endm +.macro VUPLLF vr1, vr2 + VUPLL \vr1, \vr2, 2 +.endm + +/* VECTOR PERMUTE DOUBLEWORD IMMEDIATE */ +.macro VPDI vr1, vr2, vr3, m4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC \m4, 0x84, v1, v2, v3 +.endm + +/* VECTOR REPLICATE */ +.macro VREP vr1, vr3, imm2, m4 + VX_NUM v1, \vr1 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v3&15) + .word \imm2 + MRXBOPC \m4, 0x4D, v1, v3 +.endm +.macro VREPB vr1, vr3, imm2 + VREP \vr1, \vr3, \imm2, 0 +.endm +.macro VREPH vr1, vr3, imm2 + VREP \vr1, \vr3, \imm2, 1 +.endm +.macro VREPF vr1, vr3, imm2 + VREP \vr1, \vr3, \imm2, 2 +.endm +.macro VREPG vr1, vr3, imm2 + VREP \vr1, \vr3, \imm2, 3 +.endm + +/* VECTOR MERGE HIGH */ +.macro VMRH vr1, vr2, vr3, m4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC \m4, 0x61, v1, v2, v3 +.endm +.macro VMRHB vr1, vr2, vr3 + VMRH \vr1, \vr2, \vr3, 0 +.endm +.macro VMRHH vr1, vr2, vr3 + VMRH \vr1, \vr2, \vr3, 1 +.endm +.macro VMRHF vr1, vr2, vr3 + VMRH \vr1, \vr2, \vr3, 2 +.endm +.macro VMRHG vr1, vr2, vr3 + VMRH \vr1, \vr2, \vr3, 3 +.endm + +/* VECTOR MERGE LOW */ +.macro VMRL vr1, vr2, vr3, m4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC \m4, 0x60, v1, v2, v3 +.endm +.macro VMRLB vr1, vr2, vr3 + VMRL \vr1, \vr2, \vr3, 0 +.endm +.macro VMRLH vr1, vr2, vr3 + VMRL \vr1, \vr2, \vr3, 1 +.endm +.macro VMRLF vr1, vr2, vr3 + VMRL \vr1, \vr2, \vr3, 2 +.endm +.macro VMRLG vr1, vr2, vr3 + VMRL \vr1, \vr2, \vr3, 3 +.endm + + +/* Vector integer instructions */ + +/* VECTOR AND */ +.macro VN vr1, vr2, vr3 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC 0, 0x68, v1, v2, v3 +.endm + +/* VECTOR EXCLUSIVE OR */ +.macro VX vr1, vr2, vr3 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC 0, 0x6D, v1, v2, v3 +.endm + +/* VECTOR GALOIS FIELD MULTIPLY SUM */ +.macro VGFM vr1, vr2, vr3, m4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC \m4, 0xB4, v1, v2, v3 +.endm +.macro VGFMB vr1, vr2, vr3 + VGFM \vr1, \vr2, \vr3, 0 +.endm +.macro VGFMH vr1, vr2, vr3 + VGFM \vr1, \vr2, \vr3, 1 +.endm +.macro VGFMF vr1, vr2, vr3 + VGFM \vr1, \vr2, \vr3, 2 +.endm +.macro VGFMG vr1, vr2, vr3 + VGFM \vr1, \vr2, \vr3, 3 +.endm + +/* VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE */ +.macro VGFMA vr1, vr2, vr3, vr4, m5 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + VX_NUM v4, \vr4 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) | (\m5 << 8) + MRXBOPC (v4&15), 0xBC, v1, v2, v3, v4 +.endm +.macro VGFMAB vr1, vr2, vr3, vr4 + VGFMA \vr1, \vr2, \vr3, \vr4, 0 +.endm +.macro VGFMAH vr1, vr2, vr3, vr4 + VGFMA \vr1, \vr2, \vr3, \vr4, 1 +.endm +.macro VGFMAF vr1, vr2, vr3, vr4 + VGFMA \vr1, \vr2, \vr3, \vr4, 2 +.endm +.macro VGFMAG vr1, vr2, vr3, vr4 + VGFMA \vr1, \vr2, \vr3, \vr4, 3 +.endm + +/* VECTOR SHIFT RIGHT LOGICAL BY BYTE */ +.macro VSRLB vr1, vr2, vr3 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC 0, 0x7D, v1, v2, v3 +.endm + +/* VECTOR REPLICATE IMMEDIATE */ +.macro VREPI vr1, imm2, m3 + VX_NUM v1, \vr1 + .word 0xE700 | ((v1&15) << 4) + .word \imm2 + MRXBOPC \m3, 0x45, v1 +.endm +.macro VREPIB vr1, imm2 + VREPI \vr1, \imm2, 0 +.endm +.macro VREPIH vr1, imm2 + VREPI \vr1, \imm2, 1 +.endm +.macro VREPIF vr1, imm2 + VREPI \vr1, \imm2, 2 +.endm +.macro VREPIG vr1, imm2 + VREP \vr1, \imm2, 3 +.endm + +/* VECTOR ADD */ +.macro VA vr1, vr2, vr3, m4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC \m4, 0xF3, v1, v2, v3 +.endm +.macro VAB vr1, vr2, vr3 + VA \vr1, \vr2, \vr3, 0 +.endm +.macro VAH vr1, vr2, vr3 + VA \vr1, \vr2, \vr3, 1 +.endm +.macro VAF vr1, vr2, vr3 + VA \vr1, \vr2, \vr3, 2 +.endm +.macro VAG vr1, vr2, vr3 + VA \vr1, \vr2, \vr3, 3 +.endm +.macro VAQ vr1, vr2, vr3 + VA \vr1, \vr2, \vr3, 4 +.endm + +/* VECTOR ELEMENT SHIFT RIGHT ARITHMETIC */ +.macro VESRAV vr1, vr2, vr3, m4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC \m4, 0x7A, v1, v2, v3 +.endm + +.macro VESRAVB vr1, vr2, vr3 + VESRAV \vr1, \vr2, \vr3, 0 +.endm +.macro VESRAVH vr1, vr2, vr3 + VESRAV \vr1, \vr2, \vr3, 1 +.endm +.macro VESRAVF vr1, vr2, vr3 + VESRAV \vr1, \vr2, \vr3, 2 +.endm +.macro VESRAVG vr1, vr2, vr3 + VESRAV \vr1, \vr2, \vr3, 3 +.endm + +/* VECTOR ELEMENT ROTATE LEFT LOGICAL */ +.macro VERLL vr1, vr3, disp, base="%r0", m4 + VX_NUM v1, \vr1 + VX_NUM v3, \vr3 + GR_NUM b2, \base + .word 0xE700 | ((v1&15) << 4) | (v3&15) + .word (b2 << 12) | (\disp) + MRXBOPC \m4, 0x33, v1, v3 +.endm +.macro VERLLB vr1, vr3, disp, base="%r0" + VERLL \vr1, \vr3, \disp, \base, 0 +.endm +.macro VERLLH vr1, vr3, disp, base="%r0" + VERLL \vr1, \vr3, \disp, \base, 1 +.endm +.macro VERLLF vr1, vr3, disp, base="%r0" + VERLL \vr1, \vr3, \disp, \base, 2 +.endm +.macro VERLLG vr1, vr3, disp, base="%r0" + VERLL \vr1, \vr3, \disp, \base, 3 +.endm + +/* VECTOR SHIFT LEFT DOUBLE BY BYTE */ +.macro VSLDB vr1, vr2, vr3, imm4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) | (\imm4) + MRXBOPC 0, 0x77, v1, v2, v3 +.endm + +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_S390_VX_INSN_INTERNAL_H */ diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h index 95480ed9149e..8c188f1c6d27 100644 --- a/arch/s390/include/asm/vx-insn.h +++ b/arch/s390/include/asm/vx-insn.h @@ -2,677 +2,18 @@ /* * Support for Vector Instructions * - * Assembler macros to generate .byte/.word code for particular - * vector instructions that are supported by recent binutils (>= 2.26) only. - * - * Copyright IBM Corp. 2015 - * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + * This wrapper header file allows to use the vector instruction macros in + * both assembler files as well as in inline assemblies in C files. */ #ifndef __ASM_S390_VX_INSN_H #define __ASM_S390_VX_INSN_H -#ifdef __ASSEMBLY__ - - -/* Macros to generate vector instruction byte code */ - -/* GR_NUM - Retrieve general-purpose register number - * - * @opd: Operand to store register number - * @r64: String designation register in the format "%rN" - */ -.macro GR_NUM opd gr - \opd = 255 - .ifc \gr,%r0 - \opd = 0 - .endif - .ifc \gr,%r1 - \opd = 1 - .endif - .ifc \gr,%r2 - \opd = 2 - .endif - .ifc \gr,%r3 - \opd = 3 - .endif - .ifc \gr,%r4 - \opd = 4 - .endif - .ifc \gr,%r5 - \opd = 5 - .endif - .ifc \gr,%r6 - \opd = 6 - .endif - .ifc \gr,%r7 - \opd = 7 - .endif - .ifc \gr,%r8 - \opd = 8 - .endif - .ifc \gr,%r9 - \opd = 9 - .endif - .ifc \gr,%r10 - \opd = 10 - .endif - .ifc \gr,%r11 - \opd = 11 - .endif - .ifc \gr,%r12 - \opd = 12 - .endif - .ifc \gr,%r13 - \opd = 13 - .endif - .ifc \gr,%r14 - \opd = 14 - .endif - .ifc \gr,%r15 - \opd = 15 - .endif - .if \opd == 255 - \opd = \gr - .endif -.endm - -/* VX_NUM - Retrieve vector register number - * - * @opd: Operand to store register number - * @vxr: String designation register in the format "%vN" - * - * The vector register number is used for as input number to the - * instruction and, as well as, to compute the RXB field of the - * instruction. - */ -.macro VX_NUM opd vxr - \opd = 255 - .ifc \vxr,%v0 - \opd = 0 - .endif - .ifc \vxr,%v1 - \opd = 1 - .endif - .ifc \vxr,%v2 - \opd = 2 - .endif - .ifc \vxr,%v3 - \opd = 3 - .endif - .ifc \vxr,%v4 - \opd = 4 - .endif - .ifc \vxr,%v5 - \opd = 5 - .endif - .ifc \vxr,%v6 - \opd = 6 - .endif - .ifc \vxr,%v7 - \opd = 7 - .endif - .ifc \vxr,%v8 - \opd = 8 - .endif - .ifc \vxr,%v9 - \opd = 9 - .endif - .ifc \vxr,%v10 - \opd = 10 - .endif - .ifc \vxr,%v11 - \opd = 11 - .endif - .ifc \vxr,%v12 - \opd = 12 - .endif - .ifc \vxr,%v13 - \opd = 13 - .endif - .ifc \vxr,%v14 - \opd = 14 - .endif - .ifc \vxr,%v15 - \opd = 15 - .endif - .ifc \vxr,%v16 - \opd = 16 - .endif - .ifc \vxr,%v17 - \opd = 17 - .endif - .ifc \vxr,%v18 - \opd = 18 - .endif - .ifc \vxr,%v19 - \opd = 19 - .endif - .ifc \vxr,%v20 - \opd = 20 - .endif - .ifc \vxr,%v21 - \opd = 21 - .endif - .ifc \vxr,%v22 - \opd = 22 - .endif - .ifc \vxr,%v23 - \opd = 23 - .endif - .ifc \vxr,%v24 - \opd = 24 - .endif - .ifc \vxr,%v25 - \opd = 25 - .endif - .ifc \vxr,%v26 - \opd = 26 - .endif - .ifc \vxr,%v27 - \opd = 27 - .endif - .ifc \vxr,%v28 - \opd = 28 - .endif - .ifc \vxr,%v29 - \opd = 29 - .endif - .ifc \vxr,%v30 - \opd = 30 - .endif - .ifc \vxr,%v31 - \opd = 31 - .endif - .if \opd == 255 - \opd = \vxr - .endif -.endm - -/* RXB - Compute most significant bit used vector registers - * - * @rxb: Operand to store computed RXB value - * @v1: First vector register designated operand - * @v2: Second vector register designated operand - * @v3: Third vector register designated operand - * @v4: Fourth vector register designated operand - */ -.macro RXB rxb v1 v2=0 v3=0 v4=0 - \rxb = 0 - .if \v1 & 0x10 - \rxb = \rxb | 0x08 - .endif - .if \v2 & 0x10 - \rxb = \rxb | 0x04 - .endif - .if \v3 & 0x10 - \rxb = \rxb | 0x02 - .endif - .if \v4 & 0x10 - \rxb = \rxb | 0x01 - .endif -.endm - -/* MRXB - Generate Element Size Control and RXB value - * - * @m: Element size control - * @v1: First vector register designated operand (for RXB) - * @v2: Second vector register designated operand (for RXB) - * @v3: Third vector register designated operand (for RXB) - * @v4: Fourth vector register designated operand (for RXB) - */ -.macro MRXB m v1 v2=0 v3=0 v4=0 - rxb = 0 - RXB rxb, \v1, \v2, \v3, \v4 - .byte (\m << 4) | rxb -.endm - -/* MRXBOPC - Generate Element Size Control, RXB, and final Opcode fields - * - * @m: Element size control - * @opc: Opcode - * @v1: First vector register designated operand (for RXB) - * @v2: Second vector register designated operand (for RXB) - * @v3: Third vector register designated operand (for RXB) - * @v4: Fourth vector register designated operand (for RXB) - */ -.macro MRXBOPC m opc v1 v2=0 v3=0 v4=0 - MRXB \m, \v1, \v2, \v3, \v4 - .byte \opc -.endm - -/* Vector support instructions */ - -/* VECTOR GENERATE BYTE MASK */ -.macro VGBM vr imm2 - VX_NUM v1, \vr - .word (0xE700 | ((v1&15) << 4)) - .word \imm2 - MRXBOPC 0, 0x44, v1 -.endm -.macro VZERO vxr - VGBM \vxr, 0 -.endm -.macro VONE vxr - VGBM \vxr, 0xFFFF -.endm - -/* VECTOR LOAD VR ELEMENT FROM GR */ -.macro VLVG v, gr, disp, m - VX_NUM v1, \v - GR_NUM b2, "%r0" - GR_NUM r3, \gr - .word 0xE700 | ((v1&15) << 4) | r3 - .word (b2 << 12) | (\disp) - MRXBOPC \m, 0x22, v1 -.endm -.macro VLVGB v, gr, index, base - VLVG \v, \gr, \index, \base, 0 -.endm -.macro VLVGH v, gr, index - VLVG \v, \gr, \index, 1 -.endm -.macro VLVGF v, gr, index - VLVG \v, \gr, \index, 2 -.endm -.macro VLVGG v, gr, index - VLVG \v, \gr, \index, 3 -.endm - -/* VECTOR LOAD REGISTER */ -.macro VLR v1, v2 - VX_NUM v1, \v1 - VX_NUM v2, \v2 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word 0 - MRXBOPC 0, 0x56, v1, v2 -.endm - -/* VECTOR LOAD */ -.macro VL v, disp, index="%r0", base - VX_NUM v1, \v - GR_NUM x2, \index - GR_NUM b2, \base - .word 0xE700 | ((v1&15) << 4) | x2 - .word (b2 << 12) | (\disp) - MRXBOPC 0, 0x06, v1 -.endm - -/* VECTOR LOAD ELEMENT */ -.macro VLEx vr1, disp, index="%r0", base, m3, opc - VX_NUM v1, \vr1 - GR_NUM x2, \index - GR_NUM b2, \base - .word 0xE700 | ((v1&15) << 4) | x2 - .word (b2 << 12) | (\disp) - MRXBOPC \m3, \opc, v1 -.endm -.macro VLEB vr1, disp, index="%r0", base, m3 - VLEx \vr1, \disp, \index, \base, \m3, 0x00 -.endm -.macro VLEH vr1, disp, index="%r0", base, m3 - VLEx \vr1, \disp, \index, \base, \m3, 0x01 -.endm -.macro VLEF vr1, disp, index="%r0", base, m3 - VLEx \vr1, \disp, \index, \base, \m3, 0x03 -.endm -.macro VLEG vr1, disp, index="%r0", base, m3 - VLEx \vr1, \disp, \index, \base, \m3, 0x02 -.endm - -/* VECTOR LOAD ELEMENT IMMEDIATE */ -.macro VLEIx vr1, imm2, m3, opc - VX_NUM v1, \vr1 - .word 0xE700 | ((v1&15) << 4) - .word \imm2 - MRXBOPC \m3, \opc, v1 -.endm -.macro VLEIB vr1, imm2, index - VLEIx \vr1, \imm2, \index, 0x40 -.endm -.macro VLEIH vr1, imm2, index - VLEIx \vr1, \imm2, \index, 0x41 -.endm -.macro VLEIF vr1, imm2, index - VLEIx \vr1, \imm2, \index, 0x43 -.endm -.macro VLEIG vr1, imm2, index - VLEIx \vr1, \imm2, \index, 0x42 -.endm - -/* VECTOR LOAD GR FROM VR ELEMENT */ -.macro VLGV gr, vr, disp, base="%r0", m - GR_NUM r1, \gr - GR_NUM b2, \base - VX_NUM v3, \vr - .word 0xE700 | (r1 << 4) | (v3&15) - .word (b2 << 12) | (\disp) - MRXBOPC \m, 0x21, v3 -.endm -.macro VLGVB gr, vr, disp, base="%r0" - VLGV \gr, \vr, \disp, \base, 0 -.endm -.macro VLGVH gr, vr, disp, base="%r0" - VLGV \gr, \vr, \disp, \base, 1 -.endm -.macro VLGVF gr, vr, disp, base="%r0" - VLGV \gr, \vr, \disp, \base, 2 -.endm -.macro VLGVG gr, vr, disp, base="%r0" - VLGV \gr, \vr, \disp, \base, 3 -.endm - -/* VECTOR LOAD MULTIPLE */ -.macro VLM vfrom, vto, disp, base, hint=3 - VX_NUM v1, \vfrom - VX_NUM v3, \vto - GR_NUM b2, \base - .word 0xE700 | ((v1&15) << 4) | (v3&15) - .word (b2 << 12) | (\disp) - MRXBOPC \hint, 0x36, v1, v3 -.endm - -/* VECTOR STORE */ -.macro VST vr1, disp, index="%r0", base - VX_NUM v1, \vr1 - GR_NUM x2, \index - GR_NUM b2, \base - .word 0xE700 | ((v1&15) << 4) | (x2&15) - .word (b2 << 12) | (\disp) - MRXBOPC 0, 0x0E, v1 -.endm - -/* VECTOR STORE MULTIPLE */ -.macro VSTM vfrom, vto, disp, base, hint=3 - VX_NUM v1, \vfrom - VX_NUM v3, \vto - GR_NUM b2, \base - .word 0xE700 | ((v1&15) << 4) | (v3&15) - .word (b2 << 12) | (\disp) - MRXBOPC \hint, 0x3E, v1, v3 -.endm - -/* VECTOR PERMUTE */ -.macro VPERM vr1, vr2, vr3, vr4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - VX_NUM v4, \vr4 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC (v4&15), 0x8C, v1, v2, v3, v4 -.endm - -/* VECTOR UNPACK LOGICAL LOW */ -.macro VUPLL vr1, vr2, m3 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word 0x0000 - MRXBOPC \m3, 0xD4, v1, v2 -.endm -.macro VUPLLB vr1, vr2 - VUPLL \vr1, \vr2, 0 -.endm -.macro VUPLLH vr1, vr2 - VUPLL \vr1, \vr2, 1 -.endm -.macro VUPLLF vr1, vr2 - VUPLL \vr1, \vr2, 2 -.endm - -/* VECTOR PERMUTE DOUBLEWORD IMMEDIATE */ -.macro VPDI vr1, vr2, vr3, m4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC \m4, 0x84, v1, v2, v3 -.endm - -/* VECTOR REPLICATE */ -.macro VREP vr1, vr3, imm2, m4 - VX_NUM v1, \vr1 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v3&15) - .word \imm2 - MRXBOPC \m4, 0x4D, v1, v3 -.endm -.macro VREPB vr1, vr3, imm2 - VREP \vr1, \vr3, \imm2, 0 -.endm -.macro VREPH vr1, vr3, imm2 - VREP \vr1, \vr3, \imm2, 1 -.endm -.macro VREPF vr1, vr3, imm2 - VREP \vr1, \vr3, \imm2, 2 -.endm -.macro VREPG vr1, vr3, imm2 - VREP \vr1, \vr3, \imm2, 3 -.endm - -/* VECTOR MERGE HIGH */ -.macro VMRH vr1, vr2, vr3, m4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC \m4, 0x61, v1, v2, v3 -.endm -.macro VMRHB vr1, vr2, vr3 - VMRH \vr1, \vr2, \vr3, 0 -.endm -.macro VMRHH vr1, vr2, vr3 - VMRH \vr1, \vr2, \vr3, 1 -.endm -.macro VMRHF vr1, vr2, vr3 - VMRH \vr1, \vr2, \vr3, 2 -.endm -.macro VMRHG vr1, vr2, vr3 - VMRH \vr1, \vr2, \vr3, 3 -.endm - -/* VECTOR MERGE LOW */ -.macro VMRL vr1, vr2, vr3, m4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC \m4, 0x60, v1, v2, v3 -.endm -.macro VMRLB vr1, vr2, vr3 - VMRL \vr1, \vr2, \vr3, 0 -.endm -.macro VMRLH vr1, vr2, vr3 - VMRL \vr1, \vr2, \vr3, 1 -.endm -.macro VMRLF vr1, vr2, vr3 - VMRL \vr1, \vr2, \vr3, 2 -.endm -.macro VMRLG vr1, vr2, vr3 - VMRL \vr1, \vr2, \vr3, 3 -.endm - - -/* Vector integer instructions */ - -/* VECTOR AND */ -.macro VN vr1, vr2, vr3 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC 0, 0x68, v1, v2, v3 -.endm - -/* VECTOR EXCLUSIVE OR */ -.macro VX vr1, vr2, vr3 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC 0, 0x6D, v1, v2, v3 -.endm - -/* VECTOR GALOIS FIELD MULTIPLY SUM */ -.macro VGFM vr1, vr2, vr3, m4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC \m4, 0xB4, v1, v2, v3 -.endm -.macro VGFMB vr1, vr2, vr3 - VGFM \vr1, \vr2, \vr3, 0 -.endm -.macro VGFMH vr1, vr2, vr3 - VGFM \vr1, \vr2, \vr3, 1 -.endm -.macro VGFMF vr1, vr2, vr3 - VGFM \vr1, \vr2, \vr3, 2 -.endm -.macro VGFMG vr1, vr2, vr3 - VGFM \vr1, \vr2, \vr3, 3 -.endm - -/* VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE */ -.macro VGFMA vr1, vr2, vr3, vr4, m5 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - VX_NUM v4, \vr4 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) | (\m5 << 8) - MRXBOPC (v4&15), 0xBC, v1, v2, v3, v4 -.endm -.macro VGFMAB vr1, vr2, vr3, vr4 - VGFMA \vr1, \vr2, \vr3, \vr4, 0 -.endm -.macro VGFMAH vr1, vr2, vr3, vr4 - VGFMA \vr1, \vr2, \vr3, \vr4, 1 -.endm -.macro VGFMAF vr1, vr2, vr3, vr4 - VGFMA \vr1, \vr2, \vr3, \vr4, 2 -.endm -.macro VGFMAG vr1, vr2, vr3, vr4 - VGFMA \vr1, \vr2, \vr3, \vr4, 3 -.endm - -/* VECTOR SHIFT RIGHT LOGICAL BY BYTE */ -.macro VSRLB vr1, vr2, vr3 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC 0, 0x7D, v1, v2, v3 -.endm - -/* VECTOR REPLICATE IMMEDIATE */ -.macro VREPI vr1, imm2, m3 - VX_NUM v1, \vr1 - .word 0xE700 | ((v1&15) << 4) - .word \imm2 - MRXBOPC \m3, 0x45, v1 -.endm -.macro VREPIB vr1, imm2 - VREPI \vr1, \imm2, 0 -.endm -.macro VREPIH vr1, imm2 - VREPI \vr1, \imm2, 1 -.endm -.macro VREPIF vr1, imm2 - VREPI \vr1, \imm2, 2 -.endm -.macro VREPIG vr1, imm2 - VREP \vr1, \imm2, 3 -.endm - -/* VECTOR ADD */ -.macro VA vr1, vr2, vr3, m4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC \m4, 0xF3, v1, v2, v3 -.endm -.macro VAB vr1, vr2, vr3 - VA \vr1, \vr2, \vr3, 0 -.endm -.macro VAH vr1, vr2, vr3 - VA \vr1, \vr2, \vr3, 1 -.endm -.macro VAF vr1, vr2, vr3 - VA \vr1, \vr2, \vr3, 2 -.endm -.macro VAG vr1, vr2, vr3 - VA \vr1, \vr2, \vr3, 3 -.endm -.macro VAQ vr1, vr2, vr3 - VA \vr1, \vr2, \vr3, 4 -.endm - -/* VECTOR ELEMENT SHIFT RIGHT ARITHMETIC */ -.macro VESRAV vr1, vr2, vr3, m4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC \m4, 0x7A, v1, v2, v3 -.endm - -.macro VESRAVB vr1, vr2, vr3 - VESRAV \vr1, \vr2, \vr3, 0 -.endm -.macro VESRAVH vr1, vr2, vr3 - VESRAV \vr1, \vr2, \vr3, 1 -.endm -.macro VESRAVF vr1, vr2, vr3 - VESRAV \vr1, \vr2, \vr3, 2 -.endm -.macro VESRAVG vr1, vr2, vr3 - VESRAV \vr1, \vr2, \vr3, 3 -.endm +#include <asm/vx-insn-asm.h> -/* VECTOR ELEMENT ROTATE LEFT LOGICAL */ -.macro VERLL vr1, vr3, disp, base="%r0", m4 - VX_NUM v1, \vr1 - VX_NUM v3, \vr3 - GR_NUM b2, \base - .word 0xE700 | ((v1&15) << 4) | (v3&15) - .word (b2 << 12) | (\disp) - MRXBOPC \m4, 0x33, v1, v3 -.endm -.macro VERLLB vr1, vr3, disp, base="%r0" - VERLL \vr1, \vr3, \disp, \base, 0 -.endm -.macro VERLLH vr1, vr3, disp, base="%r0" - VERLL \vr1, \vr3, \disp, \base, 1 -.endm -.macro VERLLF vr1, vr3, disp, base="%r0" - VERLL \vr1, \vr3, \disp, \base, 2 -.endm -.macro VERLLG vr1, vr3, disp, base="%r0" - VERLL \vr1, \vr3, \disp, \base, 3 -.endm +#ifndef __ASSEMBLY__ -/* VECTOR SHIFT LEFT DOUBLE BY BYTE */ -.macro VSLDB vr1, vr2, vr3, imm4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) | (\imm4) - MRXBOPC 0, 0x77, v1, v2, v3 -.endm +asm(".include \"asm/vx-insn-asm.h\"\n"); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLY__ */ #endif /* __ASM_S390_VX_INSN_H */ diff --git a/arch/s390/include/uapi/asm/ipl.h b/arch/s390/include/uapi/asm/ipl.h index d1ecd5d722a0..2cd28af50dd4 100644 --- a/arch/s390/include/uapi/asm/ipl.h +++ b/arch/s390/include/uapi/asm/ipl.h @@ -27,6 +27,7 @@ enum ipl_pbt { IPL_PBT_FCP = 0, IPL_PBT_SCP_DATA = 1, IPL_PBT_CCW = 2, + IPL_PBT_ECKD = 3, IPL_PBT_NVME = 4, }; @@ -111,6 +112,34 @@ struct ipl_pb0_ccw { __u8 reserved5[8]; } __packed; +/* IPL Parameter Block 0 for ECKD */ +struct ipl_pb0_eckd { + __u32 len; + __u8 pbt; + __u8 reserved1[3]; + __u32 reserved2[78]; + __u8 opt; + __u8 reserved4[4]; + __u8 reserved5:5; + __u8 ssid:3; + __u16 devno; + __u32 reserved6[5]; + __u32 bootprog; + __u8 reserved7[12]; + struct { + __u16 cyl; + __u8 head; + __u8 record; + __u32 reserved; + } br_chr __packed; + __u32 scp_data_len; + __u8 reserved8[260]; + __u8 scp_data[]; +} __packed; + +#define IPL_PB0_ECKD_OPT_IPL 0x10 +#define IPL_PB0_ECKD_OPT_DUMP 0x20 + #define IPL_PB0_CCW_VM_FLAG_NSS 0x80 #define IPL_PB0_CCW_VM_FLAG_VP 0x40 diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index d8ce965c0a97..3f8e760298c2 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -62,6 +62,7 @@ int main(void) OFFSET(__SF_SIE_SAVEAREA, stack_frame, sie_savearea); OFFSET(__SF_SIE_REASON, stack_frame, sie_reason); OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags); + OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys); DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame)); BLANK(); /* idle data offsets */ diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index dd74fe664ed1..c13b1455ec8c 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -46,7 +46,7 @@ struct save_area { u64 fprs[16]; u32 fpc; u32 prefix; - u64 todpreg; + u32 todpreg; u64 timer; u64 todcmp; u64 vxrs_low[16]; @@ -153,7 +153,7 @@ int copy_oldmem_kernel(void *dst, unsigned long src, size_t count) kvec.iov_base = dst; kvec.iov_len = count; - iov_iter_kvec(&iter, WRITE, &kvec, 1, count); + iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, count); if (copy_oldmem_iter(&iter, src, count) < count) return -EFAULT; return 0; diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index d7a82066a638..b376f0377a2c 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -92,7 +92,7 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view, static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, char *out_buf, const char *in_buf); static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, debug_sprintf_entry_t *curr_event); + char *out_buf, const char *inbuf); static void debug_areas_swap(debug_info_t *a, debug_info_t *b); static void debug_events_append(debug_info_t *dest, debug_info_t *src); @@ -139,7 +139,7 @@ struct debug_view debug_sprintf_view = { "sprintf", NULL, &debug_dflt_header_fn, - (debug_format_proc_t *)&debug_sprintf_format_fn, + &debug_sprintf_format_fn, NULL, NULL }; @@ -1532,8 +1532,9 @@ EXPORT_SYMBOL(debug_dflt_header_fn); #define DEBUG_SPRINTF_MAX_ARGS 10 static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, debug_sprintf_entry_t *curr_event) + char *out_buf, const char *inbuf) { + debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf; int num_longs, num_used_args = 0, i, rc = 0; int index[DEBUG_SPRINTF_MAX_ARGS]; diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index d2a1f2f4f5b8..0f423e9df095 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -122,24 +122,6 @@ _LPP_OFFSET = __LC_LPP "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", 82 .endm - /* - * The CHKSTG macro jumps to the provided label in case the - * machine check interruption code reports one of unrecoverable - * storage errors: - * - Storage error uncorrected - * - Storage key error uncorrected - * - Storage degradation with Failing-storage-address validity - */ - .macro CHKSTG errlabel - TSTMSK __LC_MCCK_CODE,(MCCK_CODE_STG_ERROR|MCCK_CODE_STG_KEY_ERROR) - jnz \errlabel - TSTMSK __LC_MCCK_CODE,MCCK_CODE_STG_DEGRAD - jz .Loklabel\@ - TSTMSK __LC_MCCK_CODE,MCCK_CODE_STG_FAIL_ADDR - jnz \errlabel -.Loklabel\@: - .endm - #if IS_ENABLED(CONFIG_KVM) /* * The OUTSIDE macro jumps to the provided label in case the value @@ -225,18 +207,20 @@ ENDPROC(__switch_to) #if IS_ENABLED(CONFIG_KVM) /* - * sie64a calling convention: - * %r2 pointer to sie control block - * %r3 guest register save area + * __sie64a calling convention: + * %r2 pointer to sie control block phys + * %r3 pointer to sie control block virt + * %r4 guest register save area */ -ENTRY(sie64a) +ENTRY(__sie64a) stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers lg %r12,__LC_CURRENT - stg %r2,__SF_SIE_CONTROL(%r15) # save control block pointer - stg %r3,__SF_SIE_SAVEAREA(%r15) # save guest register save area + stg %r2,__SF_SIE_CONTROL_PHYS(%r15) # save sie block physical.. + stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses + stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0 mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags - lmg %r0,%r13,0(%r3) # load guest gprs 0-13 + lmg %r0,%r13,0(%r4) # load guest gprs 0-13 lg %r14,__LC_GMAP # get gmap pointer ltgr %r14,%r14 jz .Lsie_gmap @@ -248,6 +232,7 @@ ENTRY(sie64a) jnz .Lsie_skip TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lsie_skip # exit if fp/vx regs changed + lg %r14,__SF_SIE_CONTROL_PHYS(%r15) # get sie block phys addr BPEXIT __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) .Lsie_entry: sie 0(%r14) @@ -258,13 +243,14 @@ ENTRY(sie64a) BPOFF BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) .Lsie_skip: + lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce .Lsie_done: # some program checks are suppressing. C code (e.g. do_protection_exception) # will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There # are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable. -# Other instructions between sie64a and .Lsie_done should not cause program +# Other instructions between __sie64a and .Lsie_done should not cause program # interrupts. So lets use 3 nops as a landing pad for all possible rewinds. .Lrewind_pad6: nopr 7 @@ -293,8 +279,8 @@ sie_exit: EX_TABLE(.Lrewind_pad4,.Lsie_fault) EX_TABLE(.Lrewind_pad2,.Lsie_fault) EX_TABLE(sie_exit,.Lsie_fault) -ENDPROC(sie64a) -EXPORT_SYMBOL(sie64a) +ENDPROC(__sie64a) +EXPORT_SYMBOL(__sie64a) EXPORT_SYMBOL(sie_exit) #endif @@ -373,7 +359,7 @@ ENTRY(pgm_check_handler) j 3f # -> fault in user space .Lpgm_skip_asce: #if IS_ENABLED(CONFIG_KVM) - # cleanup critical section for program checks in sie64a + # cleanup critical section for program checks in __sie64a OUTSIDE %r9,.Lsie_gmap,.Lsie_done,1f SIEEXIT lghi %r10,_PIF_GUEST_FAULT @@ -546,26 +532,18 @@ ENTRY(mcck_int_handler) 3: TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_MWP_VALID jno .Lmcck_panic tmhh %r8,0x0001 # interrupting from user ? - jnz 6f + jnz .Lmcck_user TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID jno .Lmcck_panic #if IS_ENABLED(CONFIG_KVM) - OUTSIDE %r9,.Lsie_gmap,.Lsie_done,6f + OUTSIDE %r9,.Lsie_gmap,.Lsie_done,.Lmcck_stack OUTSIDE %r9,.Lsie_entry,.Lsie_leave,4f oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST - j 5f -4: CHKSTG .Lmcck_panic -5: larl %r14,.Lstosm_tmp - stosm 0(%r14),0x04 # turn dat on, keep irqs off - BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) +4: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) SIEEXIT j .Lmcck_stack #endif -6: CHKSTG .Lmcck_panic - larl %r14,.Lstosm_tmp - stosm 0(%r14),0x04 # turn dat on, keep irqs off - tmhh %r8,0x0001 # interrupting from user ? - jz .Lmcck_stack +.Lmcck_user: BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP .Lmcck_stack: lg %r15,__LC_MCCK_STACK diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index d864c9a325e2..4666b29ac8a1 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -10,8 +10,7 @@ #include <linux/sched.h> #include <asm/fpu/types.h> #include <asm/fpu/api.h> - -asm(".include \"asm/vx-insn.h\"\n"); +#include <asm/vx-insn.h> void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags) { diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 325cbf69ebbd..fbd646dbf440 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -12,6 +12,7 @@ #include <linux/init.h> #include <linux/device.h> #include <linux/delay.h> +#include <linux/kstrtox.h> #include <linux/panic_notifier.h> #include <linux/reboot.h> #include <linux/ctype.h> @@ -39,6 +40,8 @@ #define IPL_UNKNOWN_STR "unknown" #define IPL_CCW_STR "ccw" +#define IPL_ECKD_STR "eckd" +#define IPL_ECKD_DUMP_STR "eckd_dump" #define IPL_FCP_STR "fcp" #define IPL_FCP_DUMP_STR "fcp_dump" #define IPL_NVME_STR "nvme" @@ -46,6 +49,7 @@ #define IPL_NSS_STR "nss" #define DUMP_CCW_STR "ccw" +#define DUMP_ECKD_STR "eckd" #define DUMP_FCP_STR "fcp" #define DUMP_NVME_STR "nvme" #define DUMP_NONE_STR "none" @@ -92,6 +96,10 @@ static char *ipl_type_str(enum ipl_type type) switch (type) { case IPL_TYPE_CCW: return IPL_CCW_STR; + case IPL_TYPE_ECKD: + return IPL_ECKD_STR; + case IPL_TYPE_ECKD_DUMP: + return IPL_ECKD_DUMP_STR; case IPL_TYPE_FCP: return IPL_FCP_STR; case IPL_TYPE_FCP_DUMP: @@ -113,6 +121,7 @@ enum dump_type { DUMP_TYPE_CCW = 2, DUMP_TYPE_FCP = 4, DUMP_TYPE_NVME = 8, + DUMP_TYPE_ECKD = 16, }; static char *dump_type_str(enum dump_type type) @@ -122,6 +131,8 @@ static char *dump_type_str(enum dump_type type) return DUMP_NONE_STR; case DUMP_TYPE_CCW: return DUMP_CCW_STR; + case DUMP_TYPE_ECKD: + return DUMP_ECKD_STR; case DUMP_TYPE_FCP: return DUMP_FCP_STR; case DUMP_TYPE_NVME: @@ -147,6 +158,7 @@ static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN; static struct ipl_parameter_block *reipl_block_fcp; static struct ipl_parameter_block *reipl_block_nvme; static struct ipl_parameter_block *reipl_block_ccw; +static struct ipl_parameter_block *reipl_block_eckd; static struct ipl_parameter_block *reipl_block_nss; static struct ipl_parameter_block *reipl_block_actual; @@ -155,12 +167,14 @@ static enum dump_type dump_type = DUMP_TYPE_NONE; static struct ipl_parameter_block *dump_block_fcp; static struct ipl_parameter_block *dump_block_nvme; static struct ipl_parameter_block *dump_block_ccw; +static struct ipl_parameter_block *dump_block_eckd; static struct sclp_ipl_info sclp_ipl_info; static bool reipl_nvme_clear; static bool reipl_fcp_clear; static bool reipl_ccw_clear; +static bool reipl_eckd_clear; static inline int __diag308(unsigned long subcode, void *addr) { @@ -218,14 +232,14 @@ IPL_ATTR_SHOW_FN(_prefix, _name, "0.%x.%04x\n", \ _ipl_blk.ssid, _ipl_blk.devno); \ IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk); \ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ - __ATTR(_name, (S_IRUGO | S_IWUSR), \ + __ATTR(_name, 0644, \ sys_##_prefix##_##_name##_show, \ sys_##_prefix##_##_name##_store) \ #define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value) \ IPL_ATTR_SHOW_FN(_prefix, _name, _format, _value) \ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ - __ATTR(_name, S_IRUGO, sys_##_prefix##_##_name##_show, NULL) + __ATTR(_name, 0444, sys_##_prefix##_##_name##_show, NULL) #define DEFINE_IPL_ATTR_RW(_prefix, _name, _fmt_out, _fmt_in, _value) \ IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, (unsigned long long) _value) \ @@ -240,7 +254,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ return len; \ } \ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ - __ATTR(_name,(S_IRUGO | S_IWUSR), \ + __ATTR(_name, 0644, \ sys_##_prefix##_##_name##_show, \ sys_##_prefix##_##_name##_store) @@ -255,7 +269,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ return len; \ } \ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ - __ATTR(_name,(S_IRUGO | S_IWUSR), \ + __ATTR(_name, 0644, \ sys_##_prefix##_##_name##_show, \ sys_##_prefix##_##_name##_store) @@ -281,6 +295,11 @@ static __init enum ipl_type get_ipl_type(void) return IPL_TYPE_NVME_DUMP; else return IPL_TYPE_NVME; + case IPL_PBT_ECKD: + if (ipl_block.eckd.opt == IPL_PB0_ECKD_OPT_DUMP) + return IPL_TYPE_ECKD_DUMP; + else + return IPL_TYPE_ECKD; } return IPL_TYPE_UNKNOWN; } @@ -325,7 +344,7 @@ static ssize_t ipl_vm_parm_show(struct kobject *kobj, } static struct kobj_attribute sys_ipl_vm_parm_attr = - __ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL); + __ATTR(parm, 0444, ipl_vm_parm_show, NULL); static ssize_t sys_ipl_device_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) @@ -334,6 +353,10 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj, case IPL_TYPE_CCW: return sprintf(page, "0.%x.%04x\n", ipl_block.ccw.ssid, ipl_block.ccw.devno); + case IPL_TYPE_ECKD: + case IPL_TYPE_ECKD_DUMP: + return sprintf(page, "0.%x.%04x\n", ipl_block.eckd.ssid, + ipl_block.eckd.devno); case IPL_TYPE_FCP: case IPL_TYPE_FCP_DUMP: return sprintf(page, "0.0.%04x\n", ipl_block.fcp.devno); @@ -346,7 +369,7 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj, } static struct kobj_attribute sys_ipl_device_attr = - __ATTR(device, S_IRUGO, sys_ipl_device_show, NULL); + __ATTR(device, 0444, sys_ipl_device_show, NULL); static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, @@ -356,7 +379,7 @@ static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj, ipl_block.hdr.len); } static struct bin_attribute ipl_parameter_attr = - __BIN_ATTR(binary_parameter, S_IRUGO, ipl_parameter_read, NULL, + __BIN_ATTR(binary_parameter, 0444, ipl_parameter_read, NULL, PAGE_SIZE); static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj, @@ -379,11 +402,24 @@ static ssize_t ipl_nvme_scp_data_read(struct file *filp, struct kobject *kobj, return memory_read_from_buffer(buf, count, &off, scp_data, size); } +static ssize_t ipl_eckd_scp_data_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + unsigned int size = ipl_block.eckd.scp_data_len; + void *scp_data = &ipl_block.eckd.scp_data; + + return memory_read_from_buffer(buf, count, &off, scp_data, size); +} + static struct bin_attribute ipl_scp_data_attr = - __BIN_ATTR(scp_data, S_IRUGO, ipl_scp_data_read, NULL, PAGE_SIZE); + __BIN_ATTR(scp_data, 0444, ipl_scp_data_read, NULL, PAGE_SIZE); static struct bin_attribute ipl_nvme_scp_data_attr = - __BIN_ATTR(scp_data, S_IRUGO, ipl_nvme_scp_data_read, NULL, PAGE_SIZE); + __BIN_ATTR(scp_data, 0444, ipl_nvme_scp_data_read, NULL, PAGE_SIZE); + +static struct bin_attribute ipl_eckd_scp_data_attr = + __BIN_ATTR(scp_data, 0444, ipl_eckd_scp_data_read, NULL, PAGE_SIZE); static struct bin_attribute *ipl_fcp_bin_attrs[] = { &ipl_parameter_attr, @@ -397,6 +433,12 @@ static struct bin_attribute *ipl_nvme_bin_attrs[] = { NULL, }; +static struct bin_attribute *ipl_eckd_bin_attrs[] = { + &ipl_parameter_attr, + &ipl_eckd_scp_data_attr, + NULL, +}; + /* FCP ipl device attributes */ DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n", @@ -418,6 +460,84 @@ DEFINE_IPL_ATTR_RO(ipl_nvme, bootprog, "%lld\n", DEFINE_IPL_ATTR_RO(ipl_nvme, br_lba, "%lld\n", (unsigned long long)ipl_block.nvme.br_lba); +/* ECKD ipl device attributes */ +DEFINE_IPL_ATTR_RO(ipl_eckd, bootprog, "%lld\n", + (unsigned long long)ipl_block.eckd.bootprog); + +#define IPL_ATTR_BR_CHR_SHOW_FN(_name, _ipb) \ +static ssize_t eckd_##_name##_br_chr_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *buf) \ +{ \ + struct ipl_pb0_eckd *ipb = &(_ipb); \ + \ + if (!ipb->br_chr.cyl && \ + !ipb->br_chr.head && \ + !ipb->br_chr.record) \ + return sprintf(buf, "auto\n"); \ + \ + return sprintf(buf, "0x%x,0x%x,0x%x\n", \ + ipb->br_chr.cyl, \ + ipb->br_chr.head, \ + ipb->br_chr.record); \ +} + +#define IPL_ATTR_BR_CHR_STORE_FN(_name, _ipb) \ +static ssize_t eckd_##_name##_br_chr_store(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + struct ipl_pb0_eckd *ipb = &(_ipb); \ + unsigned long args[3] = { 0 }; \ + char *p, *p1, *tmp = NULL; \ + int i, rc; \ + \ + if (!strncmp(buf, "auto", 4)) \ + goto out; \ + \ + tmp = kstrdup(buf, GFP_KERNEL); \ + p = tmp; \ + for (i = 0; i < 3; i++) { \ + p1 = strsep(&p, ", "); \ + if (!p1) { \ + rc = -EINVAL; \ + goto err; \ + } \ + rc = kstrtoul(p1, 0, args + i); \ + if (rc) \ + goto err; \ + } \ + \ + rc = -EINVAL; \ + if (i != 3) \ + goto err; \ + \ + if ((args[0] || args[1]) && !args[2]) \ + goto err; \ + \ + if (args[0] > UINT_MAX || args[1] > 255 || args[2] > 255) \ + goto err; \ + \ +out: \ + ipb->br_chr.cyl = args[0]; \ + ipb->br_chr.head = args[1]; \ + ipb->br_chr.record = args[2]; \ + rc = len; \ +err: \ + kfree(tmp); \ + return rc; \ +} + +IPL_ATTR_BR_CHR_SHOW_FN(ipl, ipl_block.eckd); +static struct kobj_attribute sys_ipl_eckd_br_chr_attr = + __ATTR(br_chr, 0644, eckd_ipl_br_chr_show, NULL); + +IPL_ATTR_BR_CHR_SHOW_FN(reipl, reipl_block_eckd->eckd); +IPL_ATTR_BR_CHR_STORE_FN(reipl, reipl_block_eckd->eckd); + +static struct kobj_attribute sys_reipl_eckd_br_chr_attr = + __ATTR(br_chr, 0644, eckd_reipl_br_chr_show, eckd_reipl_br_chr_store); + static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { @@ -469,6 +589,20 @@ static struct attribute_group ipl_nvme_attr_group = { .bin_attrs = ipl_nvme_bin_attrs, }; +static struct attribute *ipl_eckd_attrs[] = { + &sys_ipl_type_attr.attr, + &sys_ipl_eckd_bootprog_attr.attr, + &sys_ipl_eckd_br_chr_attr.attr, + &sys_ipl_device_attr.attr, + &sys_ipl_secure_attr.attr, + &sys_ipl_has_secure_attr.attr, + NULL, +}; + +static struct attribute_group ipl_eckd_attr_group = { + .attrs = ipl_eckd_attrs, + .bin_attrs = ipl_eckd_bin_attrs, +}; /* CCW ipl device attributes */ @@ -541,6 +675,9 @@ static int __init ipl_init(void) rc = sysfs_create_group(&ipl_kset->kobj, &ipl_ccw_attr_group_lpar); break; + case IPL_TYPE_ECKD: + rc = sysfs_create_group(&ipl_kset->kobj, &ipl_eckd_attr_group); + break; case IPL_TYPE_FCP: case IPL_TYPE_FCP_DUMP: rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group); @@ -642,11 +779,11 @@ static ssize_t reipl_ccw_vmparm_store(struct kobject *kobj, } static struct kobj_attribute sys_reipl_nss_vmparm_attr = - __ATTR(parm, S_IRUGO | S_IWUSR, reipl_nss_vmparm_show, - reipl_nss_vmparm_store); + __ATTR(parm, 0644, reipl_nss_vmparm_show, + reipl_nss_vmparm_store); static struct kobj_attribute sys_reipl_ccw_vmparm_attr = - __ATTR(parm, S_IRUGO | S_IWUSR, reipl_ccw_vmparm_show, - reipl_ccw_vmparm_store); + __ATTR(parm, 0644, reipl_ccw_vmparm_show, + reipl_ccw_vmparm_store); /* FCP reipl device attributes */ @@ -686,7 +823,7 @@ static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj, return count; } static struct bin_attribute sys_reipl_fcp_scp_data_attr = - __BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_fcp_scpdata_read, + __BIN_ATTR(scp_data, 0644, reipl_fcp_scpdata_read, reipl_fcp_scpdata_write, DIAG308_SCPDATA_SIZE); static struct bin_attribute *reipl_fcp_bin_attrs[] = { @@ -766,8 +903,8 @@ static ssize_t reipl_fcp_loadparm_store(struct kobject *kobj, } static struct kobj_attribute sys_reipl_fcp_loadparm_attr = - __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_fcp_loadparm_show, - reipl_fcp_loadparm_store); + __ATTR(loadparm, 0644, reipl_fcp_loadparm_show, + reipl_fcp_loadparm_store); static ssize_t reipl_fcp_clear_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) @@ -779,7 +916,7 @@ static ssize_t reipl_fcp_clear_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t len) { - if (strtobool(buf, &reipl_fcp_clear) < 0) + if (kstrtobool(buf, &reipl_fcp_clear) < 0) return -EINVAL; return len; } @@ -840,7 +977,7 @@ static ssize_t reipl_nvme_scpdata_write(struct file *filp, struct kobject *kobj, } static struct bin_attribute sys_reipl_nvme_scp_data_attr = - __BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_nvme_scpdata_read, + __BIN_ATTR(scp_data, 0644, reipl_nvme_scpdata_read, reipl_nvme_scpdata_write, DIAG308_SCPDATA_SIZE); static struct bin_attribute *reipl_nvme_bin_attrs[] = { @@ -872,8 +1009,8 @@ static ssize_t reipl_nvme_loadparm_store(struct kobject *kobj, } static struct kobj_attribute sys_reipl_nvme_loadparm_attr = - __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nvme_loadparm_show, - reipl_nvme_loadparm_store); + __ATTR(loadparm, 0644, reipl_nvme_loadparm_show, + reipl_nvme_loadparm_store); static struct attribute *reipl_nvme_attrs[] = { &sys_reipl_nvme_fid_attr.attr, @@ -899,7 +1036,7 @@ static ssize_t reipl_nvme_clear_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t len) { - if (strtobool(buf, &reipl_nvme_clear) < 0) + if (kstrtobool(buf, &reipl_nvme_clear) < 0) return -EINVAL; return len; } @@ -939,8 +1076,8 @@ static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj, } static struct kobj_attribute sys_reipl_ccw_loadparm_attr = - __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_ccw_loadparm_show, - reipl_ccw_loadparm_store); + __ATTR(loadparm, 0644, reipl_ccw_loadparm_show, + reipl_ccw_loadparm_store); static ssize_t reipl_ccw_clear_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) @@ -952,7 +1089,7 @@ static ssize_t reipl_ccw_clear_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t len) { - if (strtobool(buf, &reipl_ccw_clear) < 0) + if (kstrtobool(buf, &reipl_ccw_clear) < 0) return -EINVAL; return len; } @@ -985,6 +1122,85 @@ static struct attribute_group reipl_ccw_attr_group_lpar = { .attrs = reipl_ccw_attrs_lpar, }; +/* ECKD reipl device attributes */ + +static ssize_t reipl_eckd_scpdata_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + size_t size = reipl_block_eckd->eckd.scp_data_len; + void *scp_data = reipl_block_eckd->eckd.scp_data; + + return memory_read_from_buffer(buf, count, &off, scp_data, size); +} + +static ssize_t reipl_eckd_scpdata_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + size_t scpdata_len = count; + size_t padding; + + if (off) + return -EINVAL; + + memcpy(reipl_block_eckd->eckd.scp_data, buf, count); + if (scpdata_len % 8) { + padding = 8 - (scpdata_len % 8); + memset(reipl_block_eckd->eckd.scp_data + scpdata_len, + 0, padding); + scpdata_len += padding; + } + + reipl_block_eckd->hdr.len = IPL_BP_ECKD_LEN + scpdata_len; + reipl_block_eckd->eckd.len = IPL_BP0_ECKD_LEN + scpdata_len; + reipl_block_eckd->eckd.scp_data_len = scpdata_len; + + return count; +} + +static struct bin_attribute sys_reipl_eckd_scp_data_attr = + __BIN_ATTR(scp_data, 0644, reipl_eckd_scpdata_read, + reipl_eckd_scpdata_write, DIAG308_SCPDATA_SIZE); + +static struct bin_attribute *reipl_eckd_bin_attrs[] = { + &sys_reipl_eckd_scp_data_attr, + NULL, +}; + +DEFINE_IPL_CCW_ATTR_RW(reipl_eckd, device, reipl_block_eckd->eckd); +DEFINE_IPL_ATTR_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n", + reipl_block_eckd->eckd.bootprog); + +static struct attribute *reipl_eckd_attrs[] = { + &sys_reipl_eckd_device_attr.attr, + &sys_reipl_eckd_bootprog_attr.attr, + &sys_reipl_eckd_br_chr_attr.attr, + NULL, +}; + +static struct attribute_group reipl_eckd_attr_group = { + .attrs = reipl_eckd_attrs, + .bin_attrs = reipl_eckd_bin_attrs +}; + +static ssize_t reipl_eckd_clear_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%u\n", reipl_eckd_clear); +} + +static ssize_t reipl_eckd_clear_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + if (strtobool(buf, &reipl_eckd_clear) < 0) + return -EINVAL; + return len; +} + +static struct kobj_attribute sys_reipl_eckd_clear_attr = + __ATTR(clear, 0644, reipl_eckd_clear_show, reipl_eckd_clear_store); /* NSS reipl device attributes */ static void reipl_get_ascii_nss_name(char *dst, @@ -1032,12 +1248,12 @@ static ssize_t reipl_nss_name_store(struct kobject *kobj, } static struct kobj_attribute sys_reipl_nss_name_attr = - __ATTR(name, S_IRUGO | S_IWUSR, reipl_nss_name_show, - reipl_nss_name_store); + __ATTR(name, 0644, reipl_nss_name_show, + reipl_nss_name_store); static struct kobj_attribute sys_reipl_nss_loadparm_attr = - __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nss_loadparm_show, - reipl_nss_loadparm_store); + __ATTR(loadparm, 0644, reipl_nss_loadparm_show, + reipl_nss_loadparm_store); static struct attribute *reipl_nss_attrs[] = { &sys_reipl_nss_name_attr.attr, @@ -1068,6 +1284,9 @@ static int reipl_set_type(enum ipl_type type) case IPL_TYPE_CCW: reipl_block_actual = reipl_block_ccw; break; + case IPL_TYPE_ECKD: + reipl_block_actual = reipl_block_eckd; + break; case IPL_TYPE_FCP: reipl_block_actual = reipl_block_fcp; break; @@ -1098,6 +1317,8 @@ static ssize_t reipl_type_store(struct kobject *kobj, if (strncmp(buf, IPL_CCW_STR, strlen(IPL_CCW_STR)) == 0) rc = reipl_set_type(IPL_TYPE_CCW); + else if (strncmp(buf, IPL_ECKD_STR, strlen(IPL_ECKD_STR)) == 0) + rc = reipl_set_type(IPL_TYPE_ECKD); else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0) rc = reipl_set_type(IPL_TYPE_FCP); else if (strncmp(buf, IPL_NVME_STR, strlen(IPL_NVME_STR)) == 0) @@ -1113,6 +1334,7 @@ static struct kobj_attribute reipl_type_attr = static struct kset *reipl_kset; static struct kset *reipl_fcp_kset; static struct kset *reipl_nvme_kset; +static struct kset *reipl_eckd_kset; static void __reipl_run(void *unused) { @@ -1124,6 +1346,13 @@ static void __reipl_run(void *unused) else diag308(DIAG308_LOAD_NORMAL_DUMP, NULL); break; + case IPL_TYPE_ECKD: + diag308(DIAG308_SET, reipl_block_eckd); + if (reipl_eckd_clear) + diag308(DIAG308_LOAD_CLEAR, NULL); + else + diag308(DIAG308_LOAD_NORMAL, NULL); + break; case IPL_TYPE_FCP: diag308(DIAG308_SET, reipl_block_fcp); if (reipl_fcp_clear) @@ -1147,6 +1376,7 @@ static void __reipl_run(void *unused) break; case IPL_TYPE_FCP_DUMP: case IPL_TYPE_NVME_DUMP: + case IPL_TYPE_ECKD_DUMP: break; } disabled_wait(); @@ -1344,6 +1574,58 @@ out1: return rc; } +static int __init reipl_eckd_init(void) +{ + int rc; + + if (!sclp.has_sipl_eckd) + return 0; + + reipl_block_eckd = (void *)get_zeroed_page(GFP_KERNEL); + if (!reipl_block_eckd) + return -ENOMEM; + + /* sysfs: create kset for mixing attr group and bin attrs */ + reipl_eckd_kset = kset_create_and_add(IPL_ECKD_STR, NULL, + &reipl_kset->kobj); + if (!reipl_eckd_kset) { + free_page((unsigned long)reipl_block_eckd); + return -ENOMEM; + } + + rc = sysfs_create_group(&reipl_eckd_kset->kobj, &reipl_eckd_attr_group); + if (rc) + goto out1; + + if (test_facility(141)) { + rc = sysfs_create_file(&reipl_eckd_kset->kobj, + &sys_reipl_eckd_clear_attr.attr); + if (rc) + goto out2; + } else { + reipl_eckd_clear = true; + } + + if (ipl_info.type == IPL_TYPE_ECKD) { + memcpy(reipl_block_eckd, &ipl_block, sizeof(ipl_block)); + } else { + reipl_block_eckd->hdr.len = IPL_BP_ECKD_LEN; + reipl_block_eckd->hdr.version = IPL_PARM_BLOCK_VERSION; + reipl_block_eckd->eckd.len = IPL_BP0_ECKD_LEN; + reipl_block_eckd->eckd.pbt = IPL_PBT_ECKD; + reipl_block_eckd->eckd.opt = IPL_PB0_ECKD_OPT_IPL; + } + reipl_capabilities |= IPL_TYPE_ECKD; + return 0; + +out2: + sysfs_remove_group(&reipl_eckd_kset->kobj, &reipl_eckd_attr_group); +out1: + kset_unregister(reipl_eckd_kset); + free_page((unsigned long)reipl_block_eckd); + return rc; +} + static int __init reipl_type_init(void) { enum ipl_type reipl_type = ipl_info.type; @@ -1365,6 +1647,9 @@ static int __init reipl_type_init(void) } else if (reipl_block->pb0_hdr.pbt == IPL_PBT_CCW) { memcpy(reipl_block_ccw, reipl_block, size); reipl_type = IPL_TYPE_CCW; + } else if (reipl_block->pb0_hdr.pbt == IPL_PBT_ECKD) { + memcpy(reipl_block_eckd, reipl_block, size); + reipl_type = IPL_TYPE_ECKD; } out: return reipl_set_type(reipl_type); @@ -1385,6 +1670,9 @@ static int __init reipl_init(void) rc = reipl_ccw_init(); if (rc) return rc; + rc = reipl_eckd_init(); + if (rc) + return rc; rc = reipl_fcp_init(); if (rc) return rc; @@ -1457,6 +1745,29 @@ static struct attribute_group dump_nvme_attr_group = { .attrs = dump_nvme_attrs, }; +/* ECKD dump device attributes */ +DEFINE_IPL_CCW_ATTR_RW(dump_eckd, device, dump_block_eckd->eckd); +DEFINE_IPL_ATTR_RW(dump_eckd, bootprog, "%lld\n", "%llx\n", + dump_block_eckd->eckd.bootprog); + +IPL_ATTR_BR_CHR_SHOW_FN(dump, dump_block_eckd->eckd); +IPL_ATTR_BR_CHR_STORE_FN(dump, dump_block_eckd->eckd); + +static struct kobj_attribute sys_dump_eckd_br_chr_attr = + __ATTR(br_chr, 0644, eckd_dump_br_chr_show, eckd_dump_br_chr_store); + +static struct attribute *dump_eckd_attrs[] = { + &sys_dump_eckd_device_attr.attr, + &sys_dump_eckd_bootprog_attr.attr, + &sys_dump_eckd_br_chr_attr.attr, + NULL, +}; + +static struct attribute_group dump_eckd_attr_group = { + .name = IPL_ECKD_STR, + .attrs = dump_eckd_attrs, +}; + /* CCW dump device attributes */ DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ccw); @@ -1496,6 +1807,8 @@ static ssize_t dump_type_store(struct kobject *kobj, rc = dump_set_type(DUMP_TYPE_NONE); else if (strncmp(buf, DUMP_CCW_STR, strlen(DUMP_CCW_STR)) == 0) rc = dump_set_type(DUMP_TYPE_CCW); + else if (strncmp(buf, DUMP_ECKD_STR, strlen(DUMP_ECKD_STR)) == 0) + rc = dump_set_type(DUMP_TYPE_ECKD); else if (strncmp(buf, DUMP_FCP_STR, strlen(DUMP_FCP_STR)) == 0) rc = dump_set_type(DUMP_TYPE_FCP); else if (strncmp(buf, DUMP_NVME_STR, strlen(DUMP_NVME_STR)) == 0) @@ -1524,6 +1837,9 @@ static void __dump_run(void *unused) case DUMP_TYPE_CCW: diag308_dump(dump_block_ccw); break; + case DUMP_TYPE_ECKD: + diag308_dump(dump_block_eckd); + break; case DUMP_TYPE_FCP: diag308_dump(dump_block_fcp); break; @@ -1609,6 +1925,29 @@ static int __init dump_nvme_init(void) return 0; } +static int __init dump_eckd_init(void) +{ + int rc; + + if (!sclp_ipl_info.has_dump || !sclp.has_sipl_eckd) + return 0; /* LDIPL DUMP is not installed */ + dump_block_eckd = (void *)get_zeroed_page(GFP_KERNEL); + if (!dump_block_eckd) + return -ENOMEM; + rc = sysfs_create_group(&dump_kset->kobj, &dump_eckd_attr_group); + if (rc) { + free_page((unsigned long)dump_block_eckd); + return rc; + } + dump_block_eckd->hdr.len = IPL_BP_ECKD_LEN; + dump_block_eckd->hdr.version = IPL_PARM_BLOCK_VERSION; + dump_block_eckd->eckd.len = IPL_BP0_ECKD_LEN; + dump_block_eckd->eckd.pbt = IPL_PBT_ECKD; + dump_block_eckd->eckd.opt = IPL_PB0_ECKD_OPT_DUMP; + dump_capabilities |= DUMP_TYPE_ECKD; + return 0; +} + static int __init dump_init(void) { int rc; @@ -1624,6 +1963,9 @@ static int __init dump_init(void) rc = dump_ccw_init(); if (rc) return rc; + rc = dump_eckd_init(); + if (rc) + return rc; rc = dump_fcp_init(); if (rc) return rc; @@ -2057,6 +2399,11 @@ void __init setup_ipl(void) ipl_info.data.ccw.dev_id.ssid = ipl_block.ccw.ssid; ipl_info.data.ccw.dev_id.devno = ipl_block.ccw.devno; break; + case IPL_TYPE_ECKD: + case IPL_TYPE_ECKD_DUMP: + ipl_info.data.eckd.dev_id.ssid = ipl_block.eckd.ssid; + ipl_info.data.eckd.dev_id.devno = ipl_block.eckd.devno; + break; case IPL_TYPE_FCP: case IPL_TYPE_FCP_DUMP: ipl_info.data.fcp.dev_id.ssid = 0; diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 0032bdbe8e3f..401f9c933ff9 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -24,6 +24,7 @@ #include <asm/set_memory.h> #include <asm/sections.h> #include <asm/dis.h> +#include "kprobes.h" #include "entry.h" DEFINE_PER_CPU(struct kprobe *, current_kprobe); @@ -31,8 +32,6 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); struct kretprobe_blackpoint kretprobe_blacklist[] = { }; -DEFINE_INSN_CACHE_OPS(s390_insn); - static int insn_page_in_use; void *alloc_insn_page(void) diff --git a/arch/s390/kernel/kprobes.h b/arch/s390/kernel/kprobes.h new file mode 100644 index 000000000000..dc3ed5098ee7 --- /dev/null +++ b/arch/s390/kernel/kprobes.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef _ARCH_S390_KPROBES_H +#define _ARCH_S390_KPROBES_H + +#include <linux/kprobes.h> + +DEFINE_INSN_CACHE_OPS(s390_insn); + +#endif diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 31cb9b00a36b..5dbf274719a9 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -19,7 +19,7 @@ #include <linux/time.h> #include <linux/module.h> #include <linux/sched/signal.h> - +#include <linux/kvm_host.h> #include <linux/export.h> #include <asm/lowcore.h> #include <asm/smp.h> @@ -31,8 +31,7 @@ #include <asm/ctl_reg.h> #include <asm/asm-offsets.h> #include <asm/pai.h> - -#include <linux/kvm_host.h> +#include <asm/vx-insn.h> struct mcck_struct { unsigned int kill_task : 1; @@ -43,21 +42,12 @@ struct mcck_struct { }; static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck); -static struct kmem_cache *mcesa_cache; -static unsigned long mcesa_origin_lc; static inline int nmi_needs_mcesa(void) { return MACHINE_HAS_VX || MACHINE_HAS_GS; } -static inline unsigned long nmi_get_mcesa_size(void) -{ - if (MACHINE_HAS_GS) - return MCESA_MAX_SIZE; - return MCESA_MIN_SIZE; -} - /* * The initial machine check extended save area for the boot CPU. * It will be replaced on the boot CPU reinit with an allocated @@ -75,36 +65,23 @@ void __init nmi_alloc_mcesa_early(u64 *mcesad) *mcesad |= ilog2(MCESA_MAX_SIZE); } -static void __init nmi_alloc_cache(void) +int nmi_alloc_mcesa(u64 *mcesad) { unsigned long size; - - if (!nmi_needs_mcesa()) - return; - size = nmi_get_mcesa_size(); - if (size > MCESA_MIN_SIZE) - mcesa_origin_lc = ilog2(size); - /* create slab cache for the machine-check-extended-save-areas */ - mcesa_cache = kmem_cache_create("nmi_save_areas", size, size, 0, NULL); - if (!mcesa_cache) - panic("Couldn't create nmi save area cache"); -} - -int __ref nmi_alloc_mcesa(u64 *mcesad) -{ - unsigned long origin; + void *origin; *mcesad = 0; if (!nmi_needs_mcesa()) return 0; - if (!mcesa_cache) - nmi_alloc_cache(); - origin = (unsigned long) kmem_cache_alloc(mcesa_cache, GFP_KERNEL); + size = MACHINE_HAS_GS ? MCESA_MAX_SIZE : MCESA_MIN_SIZE; + origin = kmalloc(size, GFP_KERNEL); if (!origin) return -ENOMEM; /* The pointer is stored with mcesa_bits ORed in */ - kmemleak_not_leak((void *) origin); - *mcesad = __pa(origin) | mcesa_origin_lc; + kmemleak_not_leak(origin); + *mcesad = __pa(origin); + if (MACHINE_HAS_GS) + *mcesad |= ilog2(MCESA_MAX_SIZE); return 0; } @@ -112,12 +89,64 @@ void nmi_free_mcesa(u64 *mcesad) { if (!nmi_needs_mcesa()) return; - kmem_cache_free(mcesa_cache, __va(*mcesad & MCESA_ORIGIN_MASK)); + kfree(__va(*mcesad & MCESA_ORIGIN_MASK)); +} + +static __always_inline char *nmi_puts(char *dest, const char *src) +{ + while (*src) + *dest++ = *src++; + *dest = 0; + return dest; +} + +static __always_inline char *u64_to_hex(char *dest, u64 val) +{ + int i, num; + + for (i = 1; i <= 16; i++) { + num = (val >> (64 - 4 * i)) & 0xf; + if (num >= 10) + *dest++ = 'A' + num - 10; + else + *dest++ = '0' + num; + } + *dest = 0; + return dest; } static notrace void s390_handle_damage(void) { + union ctlreg0 cr0, cr0_new; + char message[100]; + psw_t psw_save; + char *ptr; + smp_emergency_stop(); + diag_amode31_ops.diag308_reset(); + ptr = nmi_puts(message, "System stopped due to unrecoverable machine check, code: 0x"); + u64_to_hex(ptr, S390_lowcore.mcck_interruption_code); + + /* + * Disable low address protection and make machine check new PSW a + * disabled wait PSW. Any additional machine check cannot be handled. + */ + __ctl_store(cr0.val, 0, 0); + cr0_new = cr0; + cr0_new.lap = 0; + __ctl_load(cr0_new.val, 0, 0); + psw_save = S390_lowcore.mcck_new_psw; + psw_bits(S390_lowcore.mcck_new_psw).io = 0; + psw_bits(S390_lowcore.mcck_new_psw).ext = 0; + psw_bits(S390_lowcore.mcck_new_psw).wait = 1; + sclp_emergency_printk(message); + + /* + * Restore machine check new PSW and control register 0 to original + * values. This makes possible system dump analysis easier. + */ + S390_lowcore.mcck_new_psw = psw_save; + __ctl_load(cr0.val, 0, 0); disabled_wait(); while (1); } @@ -181,10 +210,10 @@ void noinstr s390_handle_mcck(struct pt_regs *regs) trace_hardirqs_on(); } /* - * returns 0 if all required registers are available + * returns 0 if register contents could be validated * returns 1 otherwise */ -static int notrace s390_validate_registers(union mci mci, int umode) +static int notrace s390_validate_registers(union mci mci) { struct mcesa *mcesa; void *fpt_save_area; @@ -195,45 +224,15 @@ static int notrace s390_validate_registers(union mci mci, int umode) kill_task = 0; zero = 0; - if (!mci.gr) { - /* - * General purpose registers couldn't be restored and have - * unknown contents. Stop system or terminate process. - */ - if (!umode) - s390_handle_damage(); + if (!mci.gr || !mci.fp) kill_task = 1; - } - if (!mci.fp) { - /* - * Floating point registers can't be restored. If the - * kernel currently uses floating point registers the - * system is stopped. If the process has its floating - * pointer registers loaded it is terminated. - */ - if (S390_lowcore.fpu_flags & KERNEL_VXR_V0V7) - s390_handle_damage(); - if (!test_cpu_flag(CIF_FPU)) - kill_task = 1; - } fpt_save_area = &S390_lowcore.floating_pt_save_area; if (!mci.fc) { - /* - * Floating point control register can't be restored. - * If the kernel currently uses the floating pointer - * registers and needs the FPC register the system is - * stopped. If the process has its floating pointer - * registers loaded it is terminated. Otherwise the - * FPC is just validated. - */ - if (S390_lowcore.fpu_flags & KERNEL_FPC) - s390_handle_damage(); + kill_task = 1; asm volatile( " lfpc %0\n" : : "Q" (zero)); - if (!test_cpu_flag(CIF_FPU)) - kill_task = 1; } else { asm volatile( " lfpc %0\n" @@ -275,26 +274,15 @@ static int notrace s390_validate_registers(union mci mci, int umode) * appropriate actions. The host vector or FPU values have been * saved by KVM and will be restored by KVM. */ - if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST)) { - /* - * Vector registers can't be restored. If the kernel - * currently uses vector registers the system is - * stopped. If the process has its vector registers - * loaded it is terminated. Otherwise just validate - * the registers. - */ - if (S390_lowcore.fpu_flags & KERNEL_VXR) - s390_handle_damage(); - if (!test_cpu_flag(CIF_FPU)) - kill_task = 1; - } + if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST)) + kill_task = 1; cr0.val = S390_lowcore.cregs_save_area[0]; cr0.afp = cr0.vx = 1; __ctl_load(cr0.val, 0, 0); asm volatile( " la 1,%0\n" - " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ - " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ + " VLM 0,15,0,1\n" + " VLM 16,31,256,1\n" : : "Q" (*(struct vx_array *)mcesa->vector_save_area) : "1"); @@ -306,13 +294,8 @@ static int notrace s390_validate_registers(union mci mci, int umode) : : "a" (&S390_lowcore.access_regs_save_area) : "memory"); - if (!mci.ar) { - /* - * Access registers have unknown contents. - * Terminating task. - */ + if (!mci.ar) kill_task = 1; - } /* Validate guarded storage registers */ cr2.val = S390_lowcore.cregs_save_area[2]; if (cr2.gse) { @@ -451,7 +434,9 @@ int notrace s390_do_machine_check(struct pt_regs *regs) s390_handle_damage(); } } - if (s390_validate_registers(mci, user_mode(regs))) { + if (s390_validate_registers(mci)) { + if (!user_mode(regs)) + s390_handle_damage(); /* * Couldn't restore all register contents for the * user space process -> mark task for termination. @@ -480,7 +465,21 @@ int notrace s390_do_machine_check(struct pt_regs *regs) mcck->stp_queue |= stp_island_check(); mcck_pending = 1; } - + /* + * Reinject storage related machine checks into the guest if they + * happen when the guest is running. + */ + if (!test_cpu_flag(CIF_MCCK_GUEST)) { + /* Storage error uncorrected */ + if (mci.se) + s390_handle_damage(); + /* Storage key-error uncorrected */ + if (mci.ke) + s390_handle_damage(); + /* Storage degradation */ + if (mci.ds && mci.fa) + s390_handle_damage(); + } if (mci.cp) { /* Channel report word pending */ mcck->channel_report = 1; diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 6826e2a69a21..985e243a2ed8 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -35,9 +35,9 @@ struct pai_userdata { struct paicrypt_map { unsigned long *page; /* Page for CPU to store counters */ struct pai_userdata *save; /* Page to store no-zero counters */ - unsigned int users; /* # of PAI crypto users */ - unsigned int sampler; /* # of PAI crypto samplers */ - unsigned int counter; /* # of PAI crypto counters */ + unsigned int active_events; /* # of PAI crypto users */ + unsigned int refcnt; /* Reference count mapped buffers */ + enum paievt_mode mode; /* Type of event */ struct perf_event *event; /* Perf event for sampling */ }; @@ -56,15 +56,11 @@ static void paicrypt_event_destroy(struct perf_event *event) cpump->event = NULL; static_branch_dec(&pai_key); mutex_lock(&pai_reserve_mutex); - if (event->attr.sample_period) - cpump->sampler -= 1; - else - cpump->counter -= 1; - debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d" - " sampler %d counter %d\n", __func__, - event->attr.config, event->cpu, cpump->sampler, - cpump->counter); - if (!cpump->counter && !cpump->sampler) { + debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d" + " mode %d refcnt %d\n", __func__, + event->attr.config, event->cpu, + cpump->active_events, cpump->mode, cpump->refcnt); + if (!--cpump->refcnt) { debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n", __func__, (unsigned long)cpump->page, cpump->save); @@ -72,6 +68,7 @@ static void paicrypt_event_destroy(struct perf_event *event) cpump->page = NULL; kvfree(cpump->save); cpump->save = NULL; + cpump->mode = PAI_MODE_NONE; } mutex_unlock(&pai_reserve_mutex); } @@ -136,17 +133,14 @@ static u64 paicrypt_getall(struct perf_event *event) */ static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump) { - unsigned int *use_ptr; int rc = 0; mutex_lock(&pai_reserve_mutex); if (a->sample_period) { /* Sampling requested */ - use_ptr = &cpump->sampler; - if (cpump->counter || cpump->sampler) + if (cpump->mode != PAI_MODE_NONE) rc = -EBUSY; /* ... sampling/counting active */ } else { /* Counting requested */ - use_ptr = &cpump->counter; - if (cpump->sampler) + if (cpump->mode == PAI_MODE_SAMPLING) rc = -EBUSY; /* ... and sampling active */ } if (rc) @@ -172,12 +166,16 @@ static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump) rc = 0; unlock: - /* If rc is non-zero, do not increment counter/sampler. */ - if (!rc) - *use_ptr += 1; - debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx sampler %d" - " counter %d page %#lx save %p rc %d\n", __func__, - a->sample_period, cpump->sampler, cpump->counter, + /* If rc is non-zero, do not set mode and reference count */ + if (!rc) { + cpump->refcnt++; + cpump->mode = a->sample_period ? PAI_MODE_SAMPLING + : PAI_MODE_COUNTING; + } + debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx users %d" + " mode %d refcnt %d page %#lx save %p rc %d\n", + __func__, a->sample_period, cpump->active_events, + cpump->mode, cpump->refcnt, (unsigned long)cpump->page, cpump->save, rc); mutex_unlock(&pai_reserve_mutex); return rc; @@ -262,7 +260,7 @@ static int paicrypt_add(struct perf_event *event, int flags) struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map); unsigned long ccd; - if (cpump->users++ == 0) { + if (++cpump->active_events == 1) { ccd = virt_to_phys(cpump->page) | PAI_CRYPTO_KERNEL_OFFSET; WRITE_ONCE(S390_lowcore.ccd, ccd); __ctl_set_bit(0, 50); @@ -293,7 +291,7 @@ static void paicrypt_del(struct perf_event *event, int flags) if (!event->attr.sample_period) /* Only counting needs to read counter */ paicrypt_stop(event, PERF_EF_UPDATE); - if (cpump->users-- == 1) { + if (--cpump->active_events == 0) { __ctl_clear_bit(0, 50); WRITE_ONCE(S390_lowcore.ccd, 0); } @@ -379,7 +377,7 @@ static int paicrypt_push_sample(void) /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event CRYPTO_ALL is allowed. */ -static void paicrypt_sched_task(struct perf_event_context *ctx, bool sched_in) +static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) { /* We started with a clean page on event installation. So read out * results on schedule_out and if page was dirty, clear values. diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 74b53c531e0c..1138f57baae3 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -28,12 +28,6 @@ static debug_info_t *paiext_dbg; static unsigned int paiext_cnt; /* Extracted with QPACI instruction */ -enum paiext_mode { - PAI_MODE_NONE, - PAI_MODE_SAMPLING, - PAI_MODE_COUNTER, -}; - struct pai_userdata { u16 num; u64 value; @@ -54,7 +48,7 @@ struct paiext_cb { /* PAI extension 1 control block */ struct paiext_map { unsigned long *area; /* Area for CPU to store counters */ struct pai_userdata *save; /* Area to store non-zero counters */ - enum paiext_mode mode; /* Type of event */ + enum paievt_mode mode; /* Type of event */ unsigned int active_events; /* # of PAI Extension users */ unsigned int refcnt; struct perf_event *event; /* Perf event for sampling */ @@ -192,14 +186,14 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) goto unlock; } cpump->mode = a->sample_period ? PAI_MODE_SAMPLING - : PAI_MODE_COUNTER; + : PAI_MODE_COUNTING; } else { /* Multiple invocation, check whats active. * Supported are multiple counter events or only one sampling * event concurrently at any one time. */ if (cpump->mode == PAI_MODE_SAMPLING || - (cpump->mode == PAI_MODE_COUNTER && a->sample_period)) { + (cpump->mode == PAI_MODE_COUNTING && a->sample_period)) { rc = -EBUSY; goto unlock; } @@ -472,7 +466,7 @@ static int paiext_push_sample(void) /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event NNPA_ALL is allowed. */ -static void paiext_sched_task(struct perf_event_context *ctx, bool sched_in) +static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) { /* We started with a clean page on event installation. So read out * results on schedule_out and if page was dirty, clear values. diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 42af4b3aa02b..3f5d2db0b854 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -224,7 +224,7 @@ unsigned long __get_wchan(struct task_struct *p) unsigned long arch_align_stack(unsigned long sp) { if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= prandom_u32_max(PAGE_SIZE); + sp -= get_random_u32_below(PAGE_SIZE); return sp & ~0xf; } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index ab19ddb09d65..2b6091349daa 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -52,6 +52,7 @@ #include <linux/hugetlb.h> #include <linux/kmemleak.h> +#include <asm/archrandom.h> #include <asm/boot_data.h> #include <asm/ipl.h> #include <asm/facility.h> @@ -437,7 +438,7 @@ static void __init setup_lowcore_dat_off(void) lc->svc_new_psw.addr = (unsigned long) system_call; lc->program_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; lc->program_new_psw.addr = (unsigned long) pgm_check_handler; - lc->mcck_new_psw.mask = PSW_KERNEL_BITS; + lc->mcck_new_psw.mask = int_psw_mask; lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler; lc->io_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; lc->io_new_psw.addr = (unsigned long) io_int_handler; @@ -512,6 +513,7 @@ static void __init setup_lowcore_dat_on(void) S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT; S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT; S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT; + S390_lowcore.mcck_new_psw.mask |= PSW_MASK_DAT; S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT; __ctl_set_bit(0, 28); __ctl_store(S390_lowcore.cregs_save_area, 0, 15); diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index f9810d2a267c..9f18a4af9c13 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -255,6 +255,13 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr, */ static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) { + /* + * The misc feature indicates, among other things, that importing a + * shared page from a different protected VM will automatically also + * transfer its ownership. + */ + if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications)) + return false; if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) return false; return atomic_read(&mm->context.protected_count) > 1; diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 3105ca5bd470..ff7bf4432229 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -44,21 +44,6 @@ struct vdso_data *arch_get_vdso_data(void *vvar_page) return (struct vdso_data *)(vvar_page); } -static struct page *find_timens_vvar_page(struct vm_area_struct *vma) -{ - if (likely(vma->vm_mm == current->mm)) - return current->nsproxy->time_ns->vvar_page; - /* - * VM_PFNMAP | VM_IO protect .fault() handler from being called - * through interfaces like /proc/$pid/mem or - * process_vm_{readv,writev}() as long as there's no .access() - * in special_mapping_vmops(). - * For more details check_vma_flags() and __access_remote_vm() - */ - WARN(1, "vvar_page accessed remotely"); - return NULL; -} - /* * The VVAR page layout depends on whether a task belongs to the root or * non-root time namespace. Whenever a task changes its namespace, the VVAR @@ -84,11 +69,6 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) mmap_read_unlock(mm); return 0; } -#else -static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma) -{ - return NULL; -} #endif static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, @@ -227,7 +207,7 @@ static unsigned long vdso_addr(unsigned long start, unsigned long len) end -= len; if (end > start) { - offset = prandom_u32_max(((end - start) >> PAGE_SHIFT) + 1); + offset = get_random_u32_below(((end - start) >> PAGE_SHIFT) + 1); addr = start + (offset << PAGE_SHIFT); } else { addr = start; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 88112065d941..0ee02dae14b2 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -217,7 +217,7 @@ static int handle_itdb(struct kvm_vcpu *vcpu) return 0; if (current->thread.per_flags & PER_FLAG_NO_TE) return 0; - itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba; + itdb = phys_to_virt(vcpu->arch.sie_block->itdba); rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb)); if (rc) return rc; @@ -409,8 +409,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu) out: if (!cc) { if (kvm_s390_pv_cpu_is_protected(vcpu)) { - memcpy((void *)(sida_origin(vcpu->arch.sie_block)), - sctns, PAGE_SIZE); + memcpy(sida_addr(vcpu->arch.sie_block), sctns, PAGE_SIZE); } else { r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE); if (r) { @@ -464,7 +463,7 @@ static int handle_operexc(struct kvm_vcpu *vcpu) static int handle_pv_spx(struct kvm_vcpu *vcpu) { - u32 pref = *(u32 *)vcpu->arch.sie_block->sidad; + u32 pref = *(u32 *)sida_addr(vcpu->arch.sie_block); kvm_s390_set_prefix(vcpu, pref); trace_kvm_s390_handle_prefix(vcpu, 1, pref); @@ -497,7 +496,7 @@ static int handle_pv_sclp(struct kvm_vcpu *vcpu) static int handle_pv_uvc(struct kvm_vcpu *vcpu) { - struct uv_cb_share *guest_uvcb = (void *)vcpu->arch.sie_block->sidad; + struct uv_cb_share *guest_uvcb = sida_addr(vcpu->arch.sie_block); struct uv_cb_cts uvcb = { .header.cmd = UVC_CMD_UNPIN_PAGE_SHARED, .header.len = sizeof(uvcb), diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index ab569faf0df2..1dae78deddf2 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -314,11 +314,6 @@ static inline u8 gisa_get_ipm(struct kvm_s390_gisa *gisa) return READ_ONCE(gisa->ipm); } -static inline void gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) -{ - clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); -} - static inline int gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) { return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h deleted file mode 100644 index 484608c71dd0..000000000000 --- a/arch/s390/kvm/irq.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * s390 irqchip routines - * - * Copyright IBM Corp. 2014 - * - * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> - */ -#ifndef __KVM_IRQ_H -#define __KVM_IRQ_H - -#include <linux/kvm_host.h> - -static inline int irqchip_in_kernel(struct kvm *kvm) -{ - return 1; -} - -#endif diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 45d4b8182b07..e4890e04b210 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -210,6 +210,14 @@ module_param(diag9c_forwarding_hz, uint, 0644); MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off"); /* + * allow asynchronous deinit for protected guests; enable by default since + * the feature is opt-in anyway + */ +static int async_destroy = 1; +module_param(async_destroy, int, 0444); +MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests"); + +/* * For now we handle at most 16 double words as this is what the s390 base * kernel handles and stores in the prefix page. If we ever need to go beyond * this, this requires changes to code, but the external uapi can stay. @@ -616,6 +624,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_BPB: r = test_facility(82); break; + case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE: + r = async_destroy && is_prot_virt_host(); + break; case KVM_CAP_S390_PROTECTED: r = is_prot_virt_host(); break; @@ -1207,6 +1218,8 @@ static int kvm_s390_vm_get_migration(struct kvm *kvm, return 0; } +static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); + static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) { struct kvm_s390_vm_tod_clock gtod; @@ -1216,7 +1229,7 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) return -EINVAL; - kvm_s390_set_tod_clock(kvm, >od); + __kvm_s390_set_tod_clock(kvm, >od); VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", gtod.epoch_idx, gtod.tod); @@ -1247,7 +1260,7 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) sizeof(gtod.tod))) return -EFAULT; - kvm_s390_set_tod_clock(kvm, >od); + __kvm_s390_set_tod_clock(kvm, >od); VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); return 0; } @@ -1259,6 +1272,16 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) if (attr->flags) return -EINVAL; + mutex_lock(&kvm->lock); + /* + * For protected guests, the TOD is managed by the ultravisor, so trying + * to change it will never bring the expected results. + */ + if (kvm_s390_pv_is_protected(kvm)) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + switch (attr->attr) { case KVM_S390_VM_TOD_EXT: ret = kvm_s390_set_tod_ext(kvm, attr); @@ -1273,6 +1296,9 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) ret = -ENXIO; break; } + +out_unlock: + mutex_unlock(&kvm->lock); return ret; } @@ -2504,9 +2530,13 @@ static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd, static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) { + const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM); + void __user *argp = (void __user *)cmd->data; int r = 0; u16 dummy; - void __user *argp = (void __user *)cmd->data; + + if (need_lock) + mutex_lock(&kvm->lock); switch (cmd->cmd) { case KVM_PV_ENABLE: { @@ -2540,6 +2570,31 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); break; } + case KVM_PV_ASYNC_CLEANUP_PREPARE: + r = -EINVAL; + if (!kvm_s390_pv_is_protected(kvm) || !async_destroy) + break; + + r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); + /* + * If a CPU could not be destroyed, destroy VM will also fail. + * There is no point in trying to destroy it. Instead return + * the rc and rrc from the first CPU that failed destroying. + */ + if (r) + break; + r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc); + + /* no need to block service interrupts any more */ + clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); + break; + case KVM_PV_ASYNC_CLEANUP_PERFORM: + r = -EINVAL; + if (!async_destroy) + break; + /* kvm->lock must not be held; this is asserted inside the function. */ + r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc); + break; case KVM_PV_DISABLE: { r = -EINVAL; if (!kvm_s390_pv_is_protected(kvm)) @@ -2553,7 +2608,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) */ if (r) break; - r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc); + r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc); /* no need to block service interrupts any more */ clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); @@ -2703,6 +2758,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) default: r = -ENOTTY; } + if (need_lock) + mutex_unlock(&kvm->lock); + return r; } @@ -2907,9 +2965,8 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EINVAL; break; } - mutex_lock(&kvm->lock); + /* must be called without kvm->lock */ r = kvm_s390_handle_pv(kvm, &args); - mutex_unlock(&kvm->lock); if (copy_to_user(argp, &args, sizeof(args))) { r = -EFAULT; break; @@ -3228,6 +3285,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_s390_vsie_init(kvm); if (use_gisa) kvm_s390_gisa_init(kvm); + INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup); + kvm->arch.pv.set_aside = NULL; KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); return 0; @@ -3272,11 +3331,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm) /* * We are already at the end of life and kvm->lock is not taken. * This is ok as the file descriptor is closed by now and nobody - * can mess with the pv state. To avoid lockdep_assert_held from - * complaining we do not use kvm_s390_pv_is_protected. + * can mess with the pv state. */ - if (kvm_s390_pv_get_handle(kvm)) - kvm_s390_pv_deinit_vm(kvm, &rc, &rrc); + kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc); /* * Remove the mmu notifier only when the whole KVM VM is torn down, * and only if one was registered to begin with. If the VM is @@ -3329,28 +3386,30 @@ static void sca_del_vcpu(struct kvm_vcpu *vcpu) static void sca_add_vcpu(struct kvm_vcpu *vcpu) { if (!kvm_s390_use_sca_entries()) { - struct bsca_block *sca = vcpu->kvm->arch.sca; + phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca); /* we still need the basic sca for the ipte control */ - vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; + vcpu->arch.sie_block->scaoh = sca_phys >> 32; + vcpu->arch.sie_block->scaol = sca_phys; return; } read_lock(&vcpu->kvm->arch.sca_lock); if (vcpu->kvm->arch.use_esca) { struct esca_block *sca = vcpu->kvm->arch.sca; + phys_addr_t sca_phys = virt_to_phys(sca); - sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; - vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; + sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block); + vcpu->arch.sie_block->scaoh = sca_phys >> 32; + vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK; vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); } else { struct bsca_block *sca = vcpu->kvm->arch.sca; + phys_addr_t sca_phys = virt_to_phys(sca); - sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; - vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; + sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block); + vcpu->arch.sie_block->scaoh = sca_phys >> 32; + vcpu->arch.sie_block->scaol = sca_phys; set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); } read_unlock(&vcpu->kvm->arch.sca_lock); @@ -3381,6 +3440,7 @@ static int sca_switch_to_extended(struct kvm *kvm) struct kvm_vcpu *vcpu; unsigned long vcpu_idx; u32 scaol, scaoh; + phys_addr_t new_sca_phys; if (kvm->arch.use_esca) return 0; @@ -3389,8 +3449,9 @@ static int sca_switch_to_extended(struct kvm *kvm) if (!new_sca) return -ENOMEM; - scaoh = (u32)((u64)(new_sca) >> 32); - scaol = (u32)(u64)(new_sca) & ~0x3fU; + new_sca_phys = virt_to_phys(new_sca); + scaoh = new_sca_phys >> 32; + scaol = new_sca_phys & ESCA_SCAOL_MASK; kvm_s390_vcpu_block_all(kvm); write_lock(&kvm->arch.sca_lock); @@ -3610,15 +3671,18 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) { - free_page(vcpu->arch.sie_block->cbrlo); + free_page((unsigned long)phys_to_virt(vcpu->arch.sie_block->cbrlo)); vcpu->arch.sie_block->cbrlo = 0; } int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) { - vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT); - if (!vcpu->arch.sie_block->cbrlo) + void *cbrlo_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); + + if (!cbrlo_page) return -ENOMEM; + + vcpu->arch.sie_block->cbrlo = virt_to_phys(cbrlo_page); return 0; } @@ -3628,7 +3692,7 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->ibc = model->ibc; if (test_kvm_facility(vcpu->kvm, 7)) - vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; + vcpu->arch.sie_block->fac = virt_to_phys(model->fac_list); } static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) @@ -3685,9 +3749,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); } - vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) - | SDNXC; - vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; + vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC; + vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb); if (sclp.has_kss) kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); @@ -3737,7 +3800,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) return -ENOMEM; vcpu->arch.sie_block = &sie_page->sie_block; - vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; + vcpu->arch.sie_block->itdba = virt_to_phys(&sie_page->itdb); /* the real guest size will always be smaller than msl */ vcpu->arch.sie_block->mso = 0; @@ -4377,13 +4440,6 @@ static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_t preempt_enable(); } -void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) -{ - mutex_lock(&kvm->lock); - __kvm_s390_set_tod_clock(kvm, gtod); - mutex_unlock(&kvm->lock); -} - int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) { if (!mutex_trylock(&kvm->lock)) @@ -5161,6 +5217,7 @@ static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu, struct kvm_s390_mem_op *mop) { void __user *uaddr = (void __user *)mop->buf; + void *sida_addr; int r = 0; if (mop->flags || !mop->size) @@ -5172,16 +5229,16 @@ static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu, if (!kvm_s390_pv_cpu_is_protected(vcpu)) return -EINVAL; + sida_addr = (char *)sida_addr(vcpu->arch.sie_block) + mop->sida_offset; + switch (mop->op) { case KVM_S390_MEMOP_SIDA_READ: - if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) + - mop->sida_offset), mop->size)) + if (copy_to_user(uaddr, sida_addr, mop->size)) r = -EFAULT; break; case KVM_S390_MEMOP_SIDA_WRITE: - if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) + - mop->sida_offset), uaddr, mop->size)) + if (copy_from_user(sida_addr, uaddr, mop->size)) r = -EFAULT; break; } @@ -5559,6 +5616,11 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } +bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) +{ + return true; +} + /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old, diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index f6fd668f887e..d48588c207d8 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -23,7 +23,8 @@ /* Transactional Memory Execution related macros */ #define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE)) #define TDB_FORMAT1 1 -#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1)) +#define IS_ITDB_VALID(vcpu) \ + ((*(char *)phys_to_virt((vcpu)->arch.sie_block->itdba) == TDB_FORMAT1)) extern debug_info_t *kvm_s390_dbf; extern debug_info_t *kvm_s390_dbf_uv; @@ -233,7 +234,7 @@ static inline unsigned long kvm_s390_get_gfn_end(struct kvm_memslots *slots) static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm) { - u32 gd = (u32)(u64)kvm->arch.gisa_int.origin; + u32 gd = virt_to_phys(kvm->arch.gisa_int.origin); if (gd && sclp.has_gisaf) gd |= GISA_FORMAT1; @@ -243,6 +244,9 @@ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm) /* implemented in pv.c */ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); +int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc); +int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc); +int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc); int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc); int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc); int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc, @@ -363,7 +367,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index c50c1645c0ae..ec51e810e381 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -126,7 +126,7 @@ int kvm_s390_pci_aen_init(u8 nisc) return -EPERM; mutex_lock(&aift->aift_lock); - aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev), + aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev *), GFP_KERNEL); if (!aift->kzdev) { rc = -ENOMEM; @@ -434,6 +434,7 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev) static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) { struct zpci_dev *zdev = opaque; + u8 status; int rc; if (!zdev) @@ -486,7 +487,7 @@ static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) /* Re-register the IOMMU that was already created */ rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table)); + virt_to_phys(zdev->dma_table), &status); if (rc) goto clear_gisa; @@ -516,6 +517,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque) { struct zpci_dev *zdev = opaque; struct kvm *kvm; + u8 status; if (!zdev) return; @@ -554,7 +556,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque) /* Re-register the IOMMU that was already created */ zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table)); + virt_to_phys(zdev->dma_table), &status); out: spin_lock(&kvm->arch.kzdev_list_lock); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 3335fa09b6f1..9f8a192bd750 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -924,8 +924,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) return -EREMOTE; } if (kvm_s390_pv_cpu_is_protected(vcpu)) { - memcpy((void *)sida_origin(vcpu->arch.sie_block), (void *)mem, - PAGE_SIZE); + memcpy(sida_addr(vcpu->arch.sie_block), (void *)mem, PAGE_SIZE); rc = 0; } else { rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE); diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 7cb7799a0acb..e032ebbf51b9 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -18,6 +18,29 @@ #include <linux/mmu_notifier.h> #include "kvm-s390.h" +/** + * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to + * be destroyed + * + * @list: list head for the list of leftover VMs + * @old_gmap_table: the gmap table of the leftover protected VM + * @handle: the handle of the leftover protected VM + * @stor_var: pointer to the variable storage of the leftover protected VM + * @stor_base: address of the base storage of the leftover protected VM + * + * Represents a protected VM that is still registered with the Ultravisor, + * but which does not correspond any longer to an active KVM VM. It should + * be destroyed at some point later, either asynchronously or when the + * process terminates. + */ +struct pv_vm_to_be_destroyed { + struct list_head list; + unsigned long old_gmap_table; + u64 handle; + void *stor_var; + unsigned long stor_base; +}; + static void kvm_s390_clear_pv_state(struct kvm *kvm) { kvm->arch.pv.handle = 0; @@ -44,7 +67,7 @@ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) free_pages(vcpu->arch.pv.stor_base, get_order(uv_info.guest_cpu_stor_len)); - free_page(sida_origin(vcpu->arch.sie_block)); + free_page((unsigned long)sida_addr(vcpu->arch.sie_block)); vcpu->arch.sie_block->pv_handle_cpu = 0; vcpu->arch.sie_block->pv_handle_config = 0; memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv)); @@ -66,6 +89,7 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) .header.cmd = UVC_CMD_CREATE_SEC_CPU, .header.len = sizeof(uvcb), }; + void *sida_addr; int cc; if (kvm_s390_pv_cpu_get_handle(vcpu)) @@ -79,16 +103,17 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) /* Input */ uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm); uvcb.num = vcpu->arch.sie_block->icpua; - uvcb.state_origin = (u64)vcpu->arch.sie_block; - uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base; + uvcb.state_origin = virt_to_phys(vcpu->arch.sie_block); + uvcb.stor_origin = virt_to_phys((void *)vcpu->arch.pv.stor_base); /* Alloc Secure Instruction Data Area Designation */ - vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); - if (!vcpu->arch.sie_block->sidad) { + sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!sida_addr) { free_pages(vcpu->arch.pv.stor_base, get_order(uv_info.guest_cpu_stor_len)); return -ENOMEM; } + vcpu->arch.sie_block->sidad = virt_to_phys(sida_addr); cc = uv_call(0, (u64)&uvcb); *rc = uvcb.header.rc; @@ -159,23 +184,192 @@ out_err: return -ENOMEM; } -/* this should not fail, but if it does, we must not free the donated memory */ -int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) +/** + * kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM. + * @kvm: the KVM that was associated with this leftover protected VM + * @leftover: details about the leftover protected VM that needs a clean up + * @rc: the RC code of the Destroy Secure Configuration UVC + * @rrc: the RRC code of the Destroy Secure Configuration UVC + * + * Destroy one leftover protected VM. + * On success, kvm->mm->context.protected_count will be decremented atomically + * and all other resources used by the VM will be freed. + * + * Return: 0 in case of success, otherwise 1 + */ +static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm, + struct pv_vm_to_be_destroyed *leftover, + u16 *rc, u16 *rrc) { int cc; - cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), - UVC_CMD_DESTROY_SEC_CONF, rc, rrc); + /* It used the destroy-fast UVC, nothing left to do here */ + if (!leftover->handle) + goto done_fast; + cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc); + KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc); + WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc); + if (cc) + return cc; + /* + * Intentionally leak unusable memory. If the UVC fails, the memory + * used for the VM and its metadata is permanently unusable. + * This can only happen in case of a serious KVM or hardware bug; it + * is not expected to happen in normal operation. + */ + free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len)); + free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER); + vfree(leftover->stor_var); +done_fast: + atomic_dec(&kvm->mm->context.protected_count); + return 0; +} + +/** + * kvm_s390_destroy_lower_2g - Destroy the first 2GB of protected guest memory. + * @kvm: the VM whose memory is to be cleared. + * + * Destroy the first 2GB of guest memory, to avoid prefix issues after reboot. + * The CPUs of the protected VM need to be destroyed beforehand. + */ +static void kvm_s390_destroy_lower_2g(struct kvm *kvm) +{ + const unsigned long pages_2g = SZ_2G / PAGE_SIZE; + struct kvm_memory_slot *slot; + unsigned long len; + int srcu_idx; + + srcu_idx = srcu_read_lock(&kvm->srcu); + + /* Take the memslot containing guest absolute address 0 */ + slot = gfn_to_memslot(kvm, 0); + /* Clear all slots or parts thereof that are below 2GB */ + while (slot && slot->base_gfn < pages_2g) { + len = min_t(u64, slot->npages, pages_2g - slot->base_gfn) * PAGE_SIZE; + s390_uv_destroy_range(kvm->mm, slot->userspace_addr, slot->userspace_addr + len); + /* Take the next memslot */ + slot = gfn_to_memslot(kvm, slot->base_gfn + slot->npages); + } + + srcu_read_unlock(&kvm->srcu, srcu_idx); +} + +static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + struct uv_cb_destroy_fast uvcb = { + .header.cmd = UVC_CMD_DESTROY_SEC_CONF_FAST, + .header.len = sizeof(uvcb), + .handle = kvm_s390_pv_get_handle(kvm), + }; + int cc; + + cc = uv_call_sched(0, (u64)&uvcb); + if (rc) + *rc = uvcb.header.rc; + if (rrc) + *rrc = uvcb.header.rrc; WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); + KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x", + uvcb.header.rc, uvcb.header.rrc); + WARN_ONCE(cc, "protvirt destroy vm fast failed handle %llx rc %x rrc %x", + kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc); + /* Inteded memory leak on "impossible" error */ + if (!cc) + kvm_s390_pv_dealloc_vm(kvm); + return cc ? -EIO : 0; +} + +static inline bool is_destroy_fast_available(void) +{ + return test_bit_inv(BIT_UVC_CMD_DESTROY_SEC_CONF_FAST, uv_info.inst_calls_list); +} + +/** + * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown. + * @kvm: the VM + * @rc: return value for the RC field of the UVCB + * @rrc: return value for the RRC field of the UVCB + * + * Set aside the protected VM for a subsequent teardown. The VM will be able + * to continue immediately as a non-secure VM, and the information needed to + * properly tear down the protected VM is set aside. If another protected VM + * was already set aside without starting its teardown, this function will + * fail. + * The CPUs of the protected VM need to be destroyed beforehand. + * + * Context: kvm->lock needs to be held + * + * Return: 0 in case of success, -EINVAL if another protected VM was already set + * aside, -ENOMEM if the system ran out of memory. + */ +int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + struct pv_vm_to_be_destroyed *priv; + int res = 0; + + lockdep_assert_held(&kvm->lock); /* - * if the mm still has a mapping, make all its pages accessible - * before destroying the guest + * If another protected VM was already prepared for teardown, refuse. + * A normal deinitialization has to be performed instead. */ - if (mmget_not_zero(kvm->mm)) { - s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE); - mmput(kvm->mm); + if (kvm->arch.pv.set_aside) + return -EINVAL; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + if (is_destroy_fast_available()) { + res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc); + } else { + priv->stor_var = kvm->arch.pv.stor_var; + priv->stor_base = kvm->arch.pv.stor_base; + priv->handle = kvm_s390_pv_get_handle(kvm); + priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table; + WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); + if (s390_replace_asce(kvm->arch.gmap)) + res = -ENOMEM; } + if (res) { + kfree(priv); + return res; + } + + kvm_s390_destroy_lower_2g(kvm); + kvm_s390_clear_pv_state(kvm); + kvm->arch.pv.set_aside = priv; + + *rc = UVC_RC_EXECUTED; + *rrc = 42; + return 0; +} + +/** + * kvm_s390_pv_deinit_vm - Deinitialize the current protected VM + * @kvm: the KVM whose protected VM needs to be deinitialized + * @rc: the RC code of the UVC + * @rrc: the RRC code of the UVC + * + * Deinitialize the current protected VM. This function will destroy and + * cleanup the current protected VM, but it will not cleanup the guest + * memory. This function should only be called when the protected VM has + * just been created and therefore does not have any guest memory, or when + * the caller cleans up the guest memory separately. + * + * This function should not fail, but if it does, the donated memory must + * not be freed. + * + * Context: kvm->lock needs to be held + * + * Return: 0 in case of success, otherwise -EIO + */ +int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + int cc; + + cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), + UVC_CMD_DESTROY_SEC_CONF, rc, rrc); + WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); if (!cc) { atomic_dec(&kvm->mm->context.protected_count); kvm_s390_pv_dealloc_vm(kvm); @@ -189,11 +383,137 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) return cc ? -EIO : 0; } +/** + * kvm_s390_pv_deinit_cleanup_all - Clean up all protected VMs associated + * with a specific KVM. + * @kvm: the KVM to be cleaned up + * @rc: the RC code of the first failing UVC + * @rrc: the RRC code of the first failing UVC + * + * This function will clean up all protected VMs associated with a KVM. + * This includes the active one, the one prepared for deinitialization with + * kvm_s390_pv_set_aside, and any still pending in the need_cleanup list. + * + * Context: kvm->lock needs to be held unless being called from + * kvm_arch_destroy_vm. + * + * Return: 0 if all VMs are successfully cleaned up, otherwise -EIO + */ +int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + struct pv_vm_to_be_destroyed *cur; + bool need_zap = false; + u16 _rc, _rrc; + int cc = 0; + + /* Make sure the counter does not reach 0 before calling s390_uv_destroy_range */ + atomic_inc(&kvm->mm->context.protected_count); + + *rc = 1; + /* If the current VM is protected, destroy it */ + if (kvm_s390_pv_get_handle(kvm)) { + cc = kvm_s390_pv_deinit_vm(kvm, rc, rrc); + need_zap = true; + } + + /* If a previous protected VM was set aside, put it in the need_cleanup list */ + if (kvm->arch.pv.set_aside) { + list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup); + kvm->arch.pv.set_aside = NULL; + } + + /* Cleanup all protected VMs in the need_cleanup list */ + while (!list_empty(&kvm->arch.pv.need_cleanup)) { + cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list); + need_zap = true; + if (kvm_s390_pv_dispose_one_leftover(kvm, cur, &_rc, &_rrc)) { + cc = 1; + /* + * Only return the first error rc and rrc, so make + * sure it is not overwritten. All destroys will + * additionally be reported via KVM_UV_EVENT(). + */ + if (*rc == UVC_RC_EXECUTED) { + *rc = _rc; + *rrc = _rrc; + } + } + list_del(&cur->list); + kfree(cur); + } + + /* + * If the mm still has a mapping, try to mark all its pages as + * accessible. The counter should not reach zero before this + * cleanup has been performed. + */ + if (need_zap && mmget_not_zero(kvm->mm)) { + s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE); + mmput(kvm->mm); + } + + /* Now the counter can safely reach 0 */ + atomic_dec(&kvm->mm->context.protected_count); + return cc ? -EIO : 0; +} + +/** + * kvm_s390_pv_deinit_aside_vm - Teardown a previously set aside protected VM. + * @kvm: the VM previously associated with the protected VM + * @rc: return value for the RC field of the UVCB + * @rrc: return value for the RRC field of the UVCB + * + * Tear down the protected VM that had been previously prepared for teardown + * using kvm_s390_pv_set_aside_vm. Ideally this should be called by + * userspace asynchronously from a separate thread. + * + * Context: kvm->lock must not be held. + * + * Return: 0 in case of success, -EINVAL if no protected VM had been + * prepared for asynchronous teardowm, -EIO in case of other errors. + */ +int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + struct pv_vm_to_be_destroyed *p; + int ret = 0; + + lockdep_assert_not_held(&kvm->lock); + mutex_lock(&kvm->lock); + p = kvm->arch.pv.set_aside; + kvm->arch.pv.set_aside = NULL; + mutex_unlock(&kvm->lock); + if (!p) + return -EINVAL; + + /* When a fatal signal is received, stop immediately */ + if (s390_uv_destroy_range_interruptible(kvm->mm, 0, TASK_SIZE_MAX)) + goto done; + if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc)) + ret = -EIO; + kfree(p); + p = NULL; +done: + /* + * p is not NULL if we aborted because of a fatal signal, in which + * case queue the leftover for later cleanup. + */ + if (p) { + mutex_lock(&kvm->lock); + list_add(&p->list, &kvm->arch.pv.need_cleanup); + mutex_unlock(&kvm->lock); + /* Did not finish, but pretend things went well */ + *rc = UVC_RC_EXECUTED; + *rrc = 42; + } + return ret; +} + static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription, struct mm_struct *mm) { struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier); u16 dummy; + int r; /* * No locking is needed since this is the last thread of the last user of this @@ -202,7 +522,9 @@ static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription, * unregistered. This means that if this notifier runs, then the * struct kvm is still valid. */ - kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); + r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); + if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm)) + kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy); } static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = { @@ -226,8 +548,9 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */ uvcb.guest_stor_len = kvm->arch.pv.guest_len; uvcb.guest_asce = kvm->arch.gmap->asce; - uvcb.guest_sca = (unsigned long)kvm->arch.sca; - uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base; + uvcb.guest_sca = virt_to_phys(kvm->arch.sca); + uvcb.conf_base_stor_origin = + virt_to_phys((void *)kvm->arch.pv.stor_base); uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var; cc = uv_call_sched(0, (u64)&uvcb); diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 94138f8f0c1c..b6a0219e470a 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -546,8 +546,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI)) scb_s->eca |= scb_o->eca & ECA_CEI; /* Epoch Extension */ - if (test_kvm_facility(vcpu->kvm, 139)) + if (test_kvm_facility(vcpu->kvm, 139)) { scb_s->ecd |= scb_o->ecd & ECD_MEF; + scb_s->epdx = scb_o->epdx; + } /* etoken */ if (test_kvm_facility(vcpu->kvm, 156)) @@ -654,7 +656,7 @@ static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa) page = gfn_to_page(kvm, gpa_to_gfn(gpa)); if (is_error_page(page)) return -EINVAL; - *hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK); + *hpa = (hpa_t)page_to_phys(page) + (gpa & ~PAGE_MASK); return 0; } @@ -869,7 +871,7 @@ static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, WARN_ON_ONCE(rc); return 1; } - vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa; + vsie_page->scb_o = phys_to_virt(hpa); return 0; } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 02d15c8dc92e..74e1d873dce0 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -72,7 +72,7 @@ static struct gmap *gmap_alloc(unsigned long limit) goto out_free; page->index = 0; list_add(&page->lru, &gmap->crst_list); - table = (unsigned long *) page_to_phys(page); + table = page_to_virt(page); crst_table_init(table, etype); gmap->table = table; gmap->asce = atype | _ASCE_TABLE_LENGTH | @@ -311,12 +311,12 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; - new = (unsigned long *) page_to_phys(page); + new = page_to_virt(page); crst_table_init(new, init); spin_lock(&gmap->guest_table_lock); if (*table & _REGION_ENTRY_INVALID) { list_add(&page->lru, &gmap->crst_list); - *table = (unsigned long) new | _REGION_ENTRY_LENGTH | + *table = __pa(new) | _REGION_ENTRY_LENGTH | (*table & _REGION_ENTRY_TYPE_MASK); page->index = gaddr; page = NULL; @@ -336,12 +336,11 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, static unsigned long __gmap_segment_gaddr(unsigned long *entry) { struct page *page; - unsigned long offset, mask; + unsigned long offset; offset = (unsigned long) entry / sizeof(unsigned long); offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; - mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); - page = virt_to_page((void *)((unsigned long) entry & mask)); + page = pmd_pgtable_page((pmd_t *) entry); return page->index + offset; } @@ -557,7 +556,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, gaddr & _REGION1_MASK)) return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = __va(*table & _REGION_ENTRY_ORIGIN); } if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT; @@ -565,7 +564,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, gaddr & _REGION2_MASK)) return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = __va(*table & _REGION_ENTRY_ORIGIN); } if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT; @@ -573,7 +572,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, gaddr & _REGION3_MASK)) return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = __va(*table & _REGION_ENTRY_ORIGIN); } table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; /* Walk the parent mm page table */ @@ -813,7 +812,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, break; if (*table & _REGION_ENTRY_INVALID) return NULL; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = __va(*table & _REGION_ENTRY_ORIGIN); fallthrough; case _ASCE_TYPE_REGION2: table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT; @@ -821,7 +820,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, break; if (*table & _REGION_ENTRY_INVALID) return NULL; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = __va(*table & _REGION_ENTRY_ORIGIN); fallthrough; case _ASCE_TYPE_REGION3: table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT; @@ -829,7 +828,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, break; if (*table & _REGION_ENTRY_INVALID) return NULL; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = __va(*table & _REGION_ENTRY_ORIGIN); fallthrough; case _ASCE_TYPE_SEGMENT: table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; @@ -837,7 +836,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, break; if (*table & _REGION_ENTRY_INVALID) return NULL; - table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); + table = __va(*table & _SEGMENT_ENTRY_ORIGIN); table += (gaddr & _PAGE_INDEX) >> _PAGE_SHIFT; } return table; @@ -1150,7 +1149,7 @@ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val) if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) { address = pte_val(pte) & PAGE_MASK; address += gaddr & ~PAGE_MASK; - *val = *(unsigned long *) address; + *val = *(unsigned long *)__va(address); set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_YOUNG))); /* Do *NOT* clear the _PAGE_INVALID bit! */ rc = 0; @@ -1335,7 +1334,8 @@ static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr, */ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) { - unsigned long sto, *ste, *pgt; + unsigned long *ste; + phys_addr_t sto, pgt; struct page *page; BUG_ON(!gmap_is_shadow(sg)); @@ -1343,13 +1343,13 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN)) return; gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1); - sto = (unsigned long) (ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT)); + sto = __pa(ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT)); gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr); - pgt = (unsigned long *)(*ste & _SEGMENT_ENTRY_ORIGIN); + pgt = *ste & _SEGMENT_ENTRY_ORIGIN; *ste = _SEGMENT_ENTRY_EMPTY; - __gmap_unshadow_pgt(sg, raddr, pgt); + __gmap_unshadow_pgt(sg, raddr, __va(pgt)); /* Free page table */ - page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT); + page = phys_to_page(pgt); list_del(&page->lru); page_table_free_pgste(page); } @@ -1365,19 +1365,19 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, unsigned long *sgt) { - unsigned long *pgt; struct page *page; + phys_addr_t pgt; int i; BUG_ON(!gmap_is_shadow(sg)); for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) { if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN)) continue; - pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN); + pgt = sgt[i] & _REGION_ENTRY_ORIGIN; sgt[i] = _SEGMENT_ENTRY_EMPTY; - __gmap_unshadow_pgt(sg, raddr, pgt); + __gmap_unshadow_pgt(sg, raddr, __va(pgt)); /* Free page table */ - page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT); + page = phys_to_page(pgt); list_del(&page->lru); page_table_free_pgste(page); } @@ -1392,7 +1392,8 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, */ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) { - unsigned long r3o, *r3e, *sgt; + unsigned long r3o, *r3e; + phys_addr_t sgt; struct page *page; BUG_ON(!gmap_is_shadow(sg)); @@ -1401,12 +1402,12 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) return; gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1); r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT)); - gmap_idte_one(r3o | _ASCE_TYPE_REGION3, raddr); - sgt = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN); + gmap_idte_one(__pa(r3o) | _ASCE_TYPE_REGION3, raddr); + sgt = *r3e & _REGION_ENTRY_ORIGIN; *r3e = _REGION3_ENTRY_EMPTY; - __gmap_unshadow_sgt(sg, raddr, sgt); + __gmap_unshadow_sgt(sg, raddr, __va(sgt)); /* Free segment table */ - page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT); + page = phys_to_page(sgt); list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1422,19 +1423,19 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, unsigned long *r3t) { - unsigned long *sgt; struct page *page; + phys_addr_t sgt; int i; BUG_ON(!gmap_is_shadow(sg)); for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) { if (!(r3t[i] & _REGION_ENTRY_ORIGIN)) continue; - sgt = (unsigned long *)(r3t[i] & _REGION_ENTRY_ORIGIN); + sgt = r3t[i] & _REGION_ENTRY_ORIGIN; r3t[i] = _REGION3_ENTRY_EMPTY; - __gmap_unshadow_sgt(sg, raddr, sgt); + __gmap_unshadow_sgt(sg, raddr, __va(sgt)); /* Free segment table */ - page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT); + page = phys_to_page(sgt); list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1449,7 +1450,8 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, */ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) { - unsigned long r2o, *r2e, *r3t; + unsigned long r2o, *r2e; + phys_addr_t r3t; struct page *page; BUG_ON(!gmap_is_shadow(sg)); @@ -1458,12 +1460,12 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) return; gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1); r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT)); - gmap_idte_one(r2o | _ASCE_TYPE_REGION2, raddr); - r3t = (unsigned long *)(*r2e & _REGION_ENTRY_ORIGIN); + gmap_idte_one(__pa(r2o) | _ASCE_TYPE_REGION2, raddr); + r3t = *r2e & _REGION_ENTRY_ORIGIN; *r2e = _REGION2_ENTRY_EMPTY; - __gmap_unshadow_r3t(sg, raddr, r3t); + __gmap_unshadow_r3t(sg, raddr, __va(r3t)); /* Free region 3 table */ - page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT); + page = phys_to_page(r3t); list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1479,7 +1481,7 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, unsigned long *r2t) { - unsigned long *r3t; + phys_addr_t r3t; struct page *page; int i; @@ -1487,11 +1489,11 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) { if (!(r2t[i] & _REGION_ENTRY_ORIGIN)) continue; - r3t = (unsigned long *)(r2t[i] & _REGION_ENTRY_ORIGIN); + r3t = r2t[i] & _REGION_ENTRY_ORIGIN; r2t[i] = _REGION2_ENTRY_EMPTY; - __gmap_unshadow_r3t(sg, raddr, r3t); + __gmap_unshadow_r3t(sg, raddr, __va(r3t)); /* Free region 3 table */ - page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT); + page = phys_to_page(r3t); list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1506,8 +1508,9 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, */ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) { - unsigned long r1o, *r1e, *r2t; + unsigned long r1o, *r1e; struct page *page; + phys_addr_t r2t; BUG_ON(!gmap_is_shadow(sg)); r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */ @@ -1515,12 +1518,12 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) return; gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1); r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT)); - gmap_idte_one(r1o | _ASCE_TYPE_REGION1, raddr); - r2t = (unsigned long *)(*r1e & _REGION_ENTRY_ORIGIN); + gmap_idte_one(__pa(r1o) | _ASCE_TYPE_REGION1, raddr); + r2t = *r1e & _REGION_ENTRY_ORIGIN; *r1e = _REGION1_ENTRY_EMPTY; - __gmap_unshadow_r2t(sg, raddr, r2t); + __gmap_unshadow_r2t(sg, raddr, __va(r2t)); /* Free region 2 table */ - page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT); + page = phys_to_page(r2t); list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1536,22 +1539,23 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, unsigned long *r1t) { - unsigned long asce, *r2t; + unsigned long asce; struct page *page; + phys_addr_t r2t; int i; BUG_ON(!gmap_is_shadow(sg)); - asce = (unsigned long) r1t | _ASCE_TYPE_REGION1; + asce = __pa(r1t) | _ASCE_TYPE_REGION1; for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) { if (!(r1t[i] & _REGION_ENTRY_ORIGIN)) continue; - r2t = (unsigned long *)(r1t[i] & _REGION_ENTRY_ORIGIN); - __gmap_unshadow_r2t(sg, raddr, r2t); + r2t = r1t[i] & _REGION_ENTRY_ORIGIN; + __gmap_unshadow_r2t(sg, raddr, __va(r2t)); /* Clear entry and flush translation r1t -> r2t */ gmap_idte_one(asce, raddr); r1t[i] = _REGION1_ENTRY_EMPTY; /* Free region 2 table */ - page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT); + page = phys_to_page(r2t); list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1573,7 +1577,7 @@ static void gmap_unshadow(struct gmap *sg) sg->removed = 1; gmap_call_notifier(sg, 0, -1UL); gmap_flush_tlb(sg); - table = (unsigned long *)(sg->asce & _ASCE_ORIGIN); + table = __va(sg->asce & _ASCE_ORIGIN); switch (sg->asce & _ASCE_TYPE_MASK) { case _ASCE_TYPE_REGION1: __gmap_unshadow_r1t(sg, 0, table); @@ -1748,7 +1752,8 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, int fake) { unsigned long raddr, origin, offset, len; - unsigned long *s_r2t, *table; + unsigned long *table; + phys_addr_t s_r2t; struct page *page; int rc; @@ -1760,7 +1765,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, page->index = r2t & _REGION_ENTRY_ORIGIN; if (fake) page->index |= GMAP_SHADOW_FAKE_TABLE; - s_r2t = (unsigned long *) page_to_phys(page); + s_r2t = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */ @@ -1775,9 +1780,9 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, rc = -EAGAIN; /* Race with shadow */ goto out_free; } - crst_table_init(s_r2t, _REGION2_ENTRY_EMPTY); + crst_table_init(__va(s_r2t), _REGION2_ENTRY_EMPTY); /* mark as invalid as long as the parent table is not protected */ - *table = (unsigned long) s_r2t | _REGION_ENTRY_LENGTH | + *table = s_r2t | _REGION_ENTRY_LENGTH | _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= (r2t & _REGION_ENTRY_PROTECT); @@ -1798,8 +1803,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, spin_lock(&sg->guest_table_lock); if (!rc) { table = gmap_table_walk(sg, saddr, 4); - if (!table || (*table & _REGION_ENTRY_ORIGIN) != - (unsigned long) s_r2t) + if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r2t) rc = -EAGAIN; /* Race with unshadow */ else *table &= ~_REGION_ENTRY_INVALID; @@ -1832,7 +1836,8 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, int fake) { unsigned long raddr, origin, offset, len; - unsigned long *s_r3t, *table; + unsigned long *table; + phys_addr_t s_r3t; struct page *page; int rc; @@ -1844,7 +1849,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, page->index = r3t & _REGION_ENTRY_ORIGIN; if (fake) page->index |= GMAP_SHADOW_FAKE_TABLE; - s_r3t = (unsigned long *) page_to_phys(page); + s_r3t = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */ @@ -1859,9 +1864,9 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, rc = -EAGAIN; /* Race with shadow */ goto out_free; } - crst_table_init(s_r3t, _REGION3_ENTRY_EMPTY); + crst_table_init(__va(s_r3t), _REGION3_ENTRY_EMPTY); /* mark as invalid as long as the parent table is not protected */ - *table = (unsigned long) s_r3t | _REGION_ENTRY_LENGTH | + *table = s_r3t | _REGION_ENTRY_LENGTH | _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= (r3t & _REGION_ENTRY_PROTECT); @@ -1882,8 +1887,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, spin_lock(&sg->guest_table_lock); if (!rc) { table = gmap_table_walk(sg, saddr, 3); - if (!table || (*table & _REGION_ENTRY_ORIGIN) != - (unsigned long) s_r3t) + if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r3t) rc = -EAGAIN; /* Race with unshadow */ else *table &= ~_REGION_ENTRY_INVALID; @@ -1916,7 +1920,8 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, int fake) { unsigned long raddr, origin, offset, len; - unsigned long *s_sgt, *table; + unsigned long *table; + phys_addr_t s_sgt; struct page *page; int rc; @@ -1928,7 +1933,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, page->index = sgt & _REGION_ENTRY_ORIGIN; if (fake) page->index |= GMAP_SHADOW_FAKE_TABLE; - s_sgt = (unsigned long *) page_to_phys(page); + s_sgt = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */ @@ -1943,9 +1948,9 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, rc = -EAGAIN; /* Race with shadow */ goto out_free; } - crst_table_init(s_sgt, _SEGMENT_ENTRY_EMPTY); + crst_table_init(__va(s_sgt), _SEGMENT_ENTRY_EMPTY); /* mark as invalid as long as the parent table is not protected */ - *table = (unsigned long) s_sgt | _REGION_ENTRY_LENGTH | + *table = s_sgt | _REGION_ENTRY_LENGTH | _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= sgt & _REGION_ENTRY_PROTECT; @@ -1966,8 +1971,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, spin_lock(&sg->guest_table_lock); if (!rc) { table = gmap_table_walk(sg, saddr, 2); - if (!table || (*table & _REGION_ENTRY_ORIGIN) != - (unsigned long) s_sgt) + if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_sgt) rc = -EAGAIN; /* Race with unshadow */ else *table &= ~_REGION_ENTRY_INVALID; @@ -2040,8 +2044,9 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, int fake) { unsigned long raddr, origin; - unsigned long *s_pgt, *table; + unsigned long *table; struct page *page; + phys_addr_t s_pgt; int rc; BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE)); @@ -2052,7 +2057,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, page->index = pgt & _SEGMENT_ENTRY_ORIGIN; if (fake) page->index |= GMAP_SHADOW_FAKE_TABLE; - s_pgt = (unsigned long *) page_to_phys(page); + s_pgt = page_to_phys(page); /* Install shadow page table */ spin_lock(&sg->guest_table_lock); table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ @@ -2085,8 +2090,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, spin_lock(&sg->guest_table_lock); if (!rc) { table = gmap_table_walk(sg, saddr, 1); - if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != - (unsigned long) s_pgt) + if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != s_pgt) rc = -EAGAIN; /* Race with unshadow */ else *table &= ~_SEGMENT_ENTRY_INVALID; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 97d66a3e60fb..30ab55f868f6 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -31,6 +31,7 @@ #include <linux/cma.h> #include <linux/gfp.h> #include <linux/dma-direct.h> +#include <linux/percpu.h> #include <asm/processor.h> #include <linux/uaccess.h> #include <asm/pgalloc.h> @@ -140,25 +141,25 @@ void mark_rodata_ro(void) debug_checkwx(); } -int set_memory_encrypted(unsigned long addr, int numpages) +int set_memory_encrypted(unsigned long vaddr, int numpages) { int i; /* make specified pages unshared, (swiotlb, dma_free) */ for (i = 0; i < numpages; ++i) { - uv_remove_shared(addr); - addr += PAGE_SIZE; + uv_remove_shared(virt_to_phys((void *)vaddr)); + vaddr += PAGE_SIZE; } return 0; } -int set_memory_decrypted(unsigned long addr, int numpages) +int set_memory_decrypted(unsigned long vaddr, int numpages) { int i; /* make specified pages shared (swiotlb, dma_alloca) */ for (i = 0; i < numpages; ++i) { - uv_set_shared(addr); - addr += PAGE_SIZE; + uv_set_shared(virt_to_phys((void *)vaddr)); + vaddr += PAGE_SIZE; } return 0; } @@ -207,9 +208,6 @@ void free_initmem(void) __set_memory((unsigned long)_sinittext, (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT, SET_MEMORY_RW | SET_MEMORY_NX); - free_reserved_area(sclp_early_sccb, - sclp_early_sccb + EXT_SCCB_READ_SCP, - POISON_FREE_INITMEM, "unused early sccb"); free_initmem_default(POISON_FREE_INITMEM); } @@ -222,6 +220,41 @@ unsigned long memory_block_size_bytes(void) return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp.rzm); } +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); + +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) +{ + return LOCAL_DISTANCE; +} + +static int __init pcpu_cpu_to_node(int cpu) +{ + return 0; +} + +void __init setup_per_cpu_areas(void) +{ + unsigned long delta; + unsigned int cpu; + int rc; + + /* + * Always reserve area for module percpu variables. That's + * what the legacy allocator did. + */ + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, + PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, + pcpu_cpu_distance, + pcpu_cpu_to_node); + if (rc < 0) + panic("Failed to initialize percpu areas."); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; +} + #ifdef CONFIG_MEMORY_HOTPLUG #ifdef CONFIG_CMA diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 1571cdcb0c50..4824d1cd33d8 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -128,7 +128,7 @@ int memcpy_real(void *dest, unsigned long src, size_t count) kvec.iov_base = dest; kvec.iov_len = count; - iov_iter_kvec(&iter, WRITE, &kvec, 1, count); + iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, count); if (memcpy_real_iter(&iter, src, count) < count) return -EFAULT; return 0; diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index d5ea09d78938..1e2ea706aa22 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -58,17 +58,6 @@ void __init cmma_init(void) cmma_flag = 2; } -static inline unsigned char get_page_state(struct page *page) -{ - unsigned char state; - - asm volatile(" .insn rrf,0xb9ab0000,%0,%1,%2,0" - : "=&d" (state) - : "a" (page_to_phys(page)), - "i" (ESSA_GET_STATE)); - return state & 0x3f; -} - static inline void set_page_unused(struct page *page, int order) { int i, rc; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 73cdc5539384..ef38b1514c77 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -116,20 +116,20 @@ EXPORT_SYMBOL_GPL(pci_proc_domain); /* Modify PCI: Register I/O address translation parameters */ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas, - u64 base, u64 limit, u64 iota) + u64 base, u64 limit, u64 iota, u8 *status) { u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT); struct zpci_fib fib = {0}; - u8 cc, status; + u8 cc; WARN_ON_ONCE(iota & 0x3fff); fib.pba = base; fib.pal = limit; fib.iota = iota | ZPCI_IOTA_RTTO_FLAG; fib.gd = zdev->gisa; - cc = zpci_mod_fc(req, &fib, &status); + cc = zpci_mod_fc(req, &fib, status); if (cc) - zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status); + zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, *status); return cc; } EXPORT_SYMBOL_GPL(zpci_register_ioat); @@ -764,6 +764,7 @@ EXPORT_SYMBOL_GPL(zpci_disable_device); */ int zpci_hot_reset_device(struct zpci_dev *zdev) { + u8 status; int rc; zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh); @@ -787,7 +788,7 @@ int zpci_hot_reset_device(struct zpci_dev *zdev) if (zdev->dma_table) rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table)); + virt_to_phys(zdev->dma_table), &status); else rc = zpci_dma_init_device(zdev); if (rc) { @@ -995,7 +996,7 @@ void zpci_release_device(struct kref *kref) break; } zpci_dbg(3, "rem fid:%x\n", zdev->fid); - kfree(zdev); + kfree_rcu(zdev, rcu); } int zpci_report_error(struct pci_dev *pdev, diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 227cf0a62800..ea478d11fbd1 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -63,37 +63,55 @@ static void dma_free_page_table(void *table) kmem_cache_free(dma_page_table_cache, table); } -static unsigned long *dma_get_seg_table_origin(unsigned long *entry) +static unsigned long *dma_get_seg_table_origin(unsigned long *rtep) { + unsigned long old_rte, rte; unsigned long *sto; - if (reg_entry_isvalid(*entry)) - sto = get_rt_sto(*entry); - else { + rte = READ_ONCE(*rtep); + if (reg_entry_isvalid(rte)) { + sto = get_rt_sto(rte); + } else { sto = dma_alloc_cpu_table(); if (!sto) return NULL; - set_rt_sto(entry, virt_to_phys(sto)); - validate_rt_entry(entry); - entry_clr_protected(entry); + set_rt_sto(&rte, virt_to_phys(sto)); + validate_rt_entry(&rte); + entry_clr_protected(&rte); + + old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte); + if (old_rte != ZPCI_TABLE_INVALID) { + /* Somone else was faster, use theirs */ + dma_free_cpu_table(sto); + sto = get_rt_sto(old_rte); + } } return sto; } -static unsigned long *dma_get_page_table_origin(unsigned long *entry) +static unsigned long *dma_get_page_table_origin(unsigned long *step) { + unsigned long old_ste, ste; unsigned long *pto; - if (reg_entry_isvalid(*entry)) - pto = get_st_pto(*entry); - else { + ste = READ_ONCE(*step); + if (reg_entry_isvalid(ste)) { + pto = get_st_pto(ste); + } else { pto = dma_alloc_page_table(); if (!pto) return NULL; - set_st_pto(entry, virt_to_phys(pto)); - validate_st_entry(entry); - entry_clr_protected(entry); + set_st_pto(&ste, virt_to_phys(pto)); + validate_st_entry(&ste); + entry_clr_protected(&ste); + + old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste); + if (old_ste != ZPCI_TABLE_INVALID) { + /* Somone else was faster, use theirs */ + dma_free_page_table(pto); + pto = get_st_pto(old_ste); + } } return pto; } @@ -117,19 +135,24 @@ unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) return &pto[px]; } -void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags) +void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags) { + unsigned long pte; + + pte = READ_ONCE(*ptep); if (flags & ZPCI_PTE_INVALID) { - invalidate_pt_entry(entry); + invalidate_pt_entry(&pte); } else { - set_pt_pfaa(entry, page_addr); - validate_pt_entry(entry); + set_pt_pfaa(&pte, page_addr); + validate_pt_entry(&pte); } if (flags & ZPCI_TABLE_PROTECTED) - entry_set_protected(entry); + entry_set_protected(&pte); else - entry_clr_protected(entry); + entry_clr_protected(&pte); + + xchg(ptep, pte); } static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, @@ -137,18 +160,14 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, { unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; phys_addr_t page_addr = (pa & PAGE_MASK); - unsigned long irq_flags; unsigned long *entry; int i, rc = 0; if (!nr_pages) return -EINVAL; - spin_lock_irqsave(&zdev->dma_table_lock, irq_flags); - if (!zdev->dma_table) { - rc = -EINVAL; - goto out_unlock; - } + if (!zdev->dma_table) + return -EINVAL; for (i = 0; i < nr_pages; i++) { entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); @@ -173,8 +192,6 @@ undo_cpu_trans: dma_update_cpu_trans(entry, page_addr, flags); } } -out_unlock: - spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); return rc; } @@ -547,6 +564,7 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int zpci_dma_init_device(struct zpci_dev *zdev) { + u8 status; int rc; /* @@ -557,7 +575,6 @@ int zpci_dma_init_device(struct zpci_dev *zdev) WARN_ON(zdev->s390_domain); spin_lock_init(&zdev->iommu_bitmap_lock); - spin_lock_init(&zdev->dma_table_lock); zdev->dma_table = dma_alloc_cpu_table(); if (!zdev->dma_table) { @@ -598,7 +615,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev) } if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table))) { + virt_to_phys(zdev->dma_table), &status)) { rc = -EIO; goto free_bitmap; } diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index a2b42a63a53b..4ab0cf829999 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -132,7 +132,7 @@ static int zpci_clear_irq(struct zpci_dev *zdev) static int zpci_set_irq_affinity(struct irq_data *data, const struct cpumask *dest, bool force) { - struct msi_desc *entry = irq_get_msi_desc(data->irq); + struct msi_desc *entry = irq_data_get_msi_desc(data); struct msi_msg msg = entry->msg; int cpu_addr = smp_cpu_get_cpu_address(cpumask_first(dest)); |