aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorMarc Zyngier <maz@kernel.org>2020-12-09 10:00:24 +0000
committerMarc Zyngier <maz@kernel.org>2020-12-09 10:00:24 +0000
commit3a514592b698588326924625b6948a10c35fadd5 (patch)
tree1209f5cb0a780f87c41e3299b22776a221766892 /arch
parent17f84520cb8fcaf475c96c3ee90dd97b55a63669 (diff)
parent0cc519f85a527e1c5ad5a7f182105fe614e9ff80 (diff)
Merge remote-tracking branch 'origin/kvm-arm64/psci-relay' into kvmarm-master/next
Signed-off-by: Marc Zyngier <maz@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/Kconfig25
-rw-r--r--arch/arm64/include/asm/alternative-macros.h217
-rw-r--r--arch/arm64/include/asm/alternative.h267
-rw-r--r--arch/arm64/include/asm/asm-uaccess.h31
-rw-r--r--arch/arm64/include/asm/cpucaps.h6
-rw-r--r--arch/arm64/include/asm/cpufeature.h15
-rw-r--r--arch/arm64/include/asm/el2_setup.h181
-rw-r--r--arch/arm64/include/asm/exec.h1
-rw-r--r--arch/arm64/include/asm/futex.h8
-rw-r--r--arch/arm64/include/asm/insn.h3
-rw-r--r--arch/arm64/include/asm/kvm_arm.h1
-rw-r--r--arch/arm64/include/asm/kvm_asm.h10
-rw-r--r--arch/arm64/include/asm/kvm_host.h10
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h4
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h24
-rw-r--r--arch/arm64/include/asm/percpu.h6
-rw-r--r--arch/arm64/include/asm/processor.h4
-rw-r--r--arch/arm64/include/asm/ptrace.h8
-rw-r--r--arch/arm64/include/asm/rwonce.h73
-rw-r--r--arch/arm64/include/asm/sections.h1
-rw-r--r--arch/arm64/include/asm/smp.h4
-rw-r--r--arch/arm64/include/asm/sysreg.h22
-rw-r--r--arch/arm64/include/asm/thread_info.h10
-rw-r--r--arch/arm64/include/asm/uaccess.h174
-rw-r--r--arch/arm64/include/asm/virt.h26
-rw-r--r--arch/arm64/kernel/alternative.c7
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c4
-rw-r--r--arch/arm64/kernel/asm-offsets.c8
-rw-r--r--arch/arm64/kernel/cpufeature.c68
-rw-r--r--arch/arm64/kernel/entry.S19
-rw-r--r--arch/arm64/kernel/head.S180
-rw-r--r--arch/arm64/kernel/image-vars.h6
-rw-r--r--arch/arm64/kernel/process.c29
-rw-r--r--arch/arm64/kernel/proton-pack.c5
-rw-r--r--arch/arm64/kernel/sdei.c33
-rw-r--r--arch/arm64/kernel/setup.c2
-rw-r--r--arch/arm64/kernel/signal.c3
-rw-r--r--arch/arm64/kernel/sleep.S2
-rw-r--r--arch/arm64/kernel/suspend.c1
-rw-r--r--arch/arm64/kernel/vdso/Makefile2
-rw-r--r--arch/arm64/kernel/vdso32/Makefile2
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S12
-rw-r--r--arch/arm64/kvm/arm.c139
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/trap_handler.h18
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile3
-rw-r--r--arch/arm64/kvm/hyp/nvhe/host.S47
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-init.S152
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c45
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-smp.c40
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp.lds.S1
-rw-r--r--arch/arm64/kvm/hyp/nvhe/psci-relay.c324
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c5
-rw-r--r--arch/arm64/kvm/va_layout.c30
-rw-r--r--arch/arm64/lib/clear_user.S8
-rw-r--r--arch/arm64/lib/copy_from_user.S8
-rw-r--r--arch/arm64/lib/copy_in_user.S16
-rw-r--r--arch/arm64/lib/copy_to_user.S8
-rw-r--r--arch/arm64/lib/mte.S6
-rw-r--r--arch/arm64/lib/uaccess_flushcache.c4
-rw-r--r--arch/arm64/mm/fault.c5
-rw-r--r--arch/arm64/mm/proc.S2
61 files changed, 1614 insertions, 761 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1515f6f153a0..8d13b9135634 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -195,7 +195,6 @@ config ARM64
select PCI_SYSCALL if PCI
select POWER_RESET
select POWER_SUPPLY
- select SET_FS
select SPARSE_IRQ
select SWIOTLB
select SYSCTL_EXCEPTION_TRACE
@@ -1388,6 +1387,9 @@ config ARM64_PAN
The feature is detected at runtime, and will remain as a 'nop'
instruction if the cpu does not implement the feature.
+config AS_HAS_LDAPR
+ def_bool $(as-instr,.arch_extension rcpc)
+
config ARM64_LSE_ATOMICS
bool
default ARM64_USE_LSE_ATOMICS
@@ -1425,27 +1427,6 @@ endmenu
menu "ARMv8.2 architectural features"
-config ARM64_UAO
- bool "Enable support for User Access Override (UAO)"
- default y
- help
- User Access Override (UAO; part of the ARMv8.2 Extensions)
- causes the 'unprivileged' variant of the load/store instructions to
- be overridden to be privileged.
-
- This option changes get_user() and friends to use the 'unprivileged'
- variant of the load/store instructions. This ensures that user-space
- really did have access to the supplied memory. When addr_limit is
- set to kernel memory the UAO bit will be set, allowing privileged
- access to kernel memory.
-
- Choosing this option will cause copy_to_user() et al to use user-space
- memory permissions.
-
- The feature is detected at runtime, the kernel will use the
- regular load/store instructions if the cpu does not implement the
- feature.
-
config ARM64_PMEM
bool "Enable support for persistent memory"
select ARCH_HAS_PMEM_API
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
new file mode 100644
index 000000000000..5df500dcc627
--- /dev/null
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ALTERNATIVE_MACROS_H
+#define __ASM_ALTERNATIVE_MACROS_H
+
+#include <asm/cpucaps.h>
+
+#define ARM64_CB_PATCH ARM64_NCAPS
+
+/* A64 instructions are always 32 bits. */
+#define AARCH64_INSN_SIZE 4
+
+#ifndef __ASSEMBLY__
+
+#include <linux/stringify.h>
+
+#define ALTINSTR_ENTRY(feature) \
+ " .word 661b - .\n" /* label */ \
+ " .word 663f - .\n" /* new instruction */ \
+ " .hword " __stringify(feature) "\n" /* feature bit */ \
+ " .byte 662b-661b\n" /* source len */ \
+ " .byte 664f-663f\n" /* replacement len */
+
+#define ALTINSTR_ENTRY_CB(feature, cb) \
+ " .word 661b - .\n" /* label */ \
+ " .word " __stringify(cb) "- .\n" /* callback */ \
+ " .hword " __stringify(feature) "\n" /* feature bit */ \
+ " .byte 662b-661b\n" /* source len */ \
+ " .byte 664f-663f\n" /* replacement len */
+
+/*
+ * alternative assembly primitive:
+ *
+ * If any of these .org directive fail, it means that insn1 and insn2
+ * don't have the same length. This used to be written as
+ *
+ * .if ((664b-663b) != (662b-661b))
+ * .error "Alternatives instruction length mismatch"
+ * .endif
+ *
+ * but most assemblers die if insn1 or insn2 have a .inst. This should
+ * be fixed in a binutils release posterior to 2.25.51.0.2 (anything
+ * containing commit 4e4d08cf7399b606 or c1baaddf8861).
+ *
+ * Alternatives with callbacks do not generate replacement instructions.
+ */
+#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \
+ ".if "__stringify(cfg_enabled)" == 1\n" \
+ "661:\n\t" \
+ oldinstr "\n" \
+ "662:\n" \
+ ".pushsection .altinstructions,\"a\"\n" \
+ ALTINSTR_ENTRY(feature) \
+ ".popsection\n" \
+ ".subsection 1\n" \
+ "663:\n\t" \
+ newinstr "\n" \
+ "664:\n\t" \
+ ".org . - (664b-663b) + (662b-661b)\n\t" \
+ ".org . - (662b-661b) + (664b-663b)\n\t" \
+ ".previous\n" \
+ ".endif\n"
+
+#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \
+ ".if "__stringify(cfg_enabled)" == 1\n" \
+ "661:\n\t" \
+ oldinstr "\n" \
+ "662:\n" \
+ ".pushsection .altinstructions,\"a\"\n" \
+ ALTINSTR_ENTRY_CB(feature, cb) \
+ ".popsection\n" \
+ "663:\n\t" \
+ "664:\n\t" \
+ ".endif\n"
+
+#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \
+ __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
+
+#define ALTERNATIVE_CB(oldinstr, cb) \
+ __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb)
+#else
+
+#include <asm/assembler.h>
+
+.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
+ .word \orig_offset - .
+ .word \alt_offset - .
+ .hword \feature
+ .byte \orig_len
+ .byte \alt_len
+.endm
+
+.macro alternative_insn insn1, insn2, cap, enable = 1
+ .if \enable
+661: \insn1
+662: .pushsection .altinstructions, "a"
+ altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f
+ .popsection
+ .subsection 1
+663: \insn2
+664: .previous
+ .org . - (664b-663b) + (662b-661b)
+ .org . - (662b-661b) + (664b-663b)
+ .endif
+.endm
+
+/*
+ * Alternative sequences
+ *
+ * The code for the case where the capability is not present will be
+ * assembled and linked as normal. There are no restrictions on this
+ * code.
+ *
+ * The code for the case where the capability is present will be
+ * assembled into a special section to be used for dynamic patching.
+ * Code for that case must:
+ *
+ * 1. Be exactly the same length (in bytes) as the default code
+ * sequence.
+ *
+ * 2. Not contain a branch target that is used outside of the
+ * alternative sequence it is defined in (branches into an
+ * alternative sequence are not fixed up).
+ */
+
+/*
+ * Begin an alternative code sequence.
+ */
+.macro alternative_if_not cap
+ .set .Lasm_alt_mode, 0
+ .pushsection .altinstructions, "a"
+ altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
+ .popsection
+661:
+.endm
+
+.macro alternative_if cap
+ .set .Lasm_alt_mode, 1
+ .pushsection .altinstructions, "a"
+ altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f
+ .popsection
+ .subsection 1
+ .align 2 /* So GAS knows label 661 is suitably aligned */
+661:
+.endm
+
+.macro alternative_cb cb
+ .set .Lasm_alt_mode, 0
+ .pushsection .altinstructions, "a"
+ altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
+ .popsection
+661:
+.endm
+
+/*
+ * Provide the other half of the alternative code sequence.
+ */
+.macro alternative_else
+662:
+ .if .Lasm_alt_mode==0
+ .subsection 1
+ .else
+ .previous
+ .endif
+663:
+.endm
+
+/*
+ * Complete an alternative code sequence.
+ */
+.macro alternative_endif
+664:
+ .if .Lasm_alt_mode==0
+ .previous
+ .endif
+ .org . - (664b-663b) + (662b-661b)
+ .org . - (662b-661b) + (664b-663b)
+.endm
+
+/*
+ * Callback-based alternative epilogue
+ */
+.macro alternative_cb_end
+662:
+.endm
+
+/*
+ * Provides a trivial alternative or default sequence consisting solely
+ * of NOPs. The number of NOPs is chosen automatically to match the
+ * previous case.
+ */
+.macro alternative_else_nop_endif
+alternative_else
+ nops (662b-661b) / AARCH64_INSN_SIZE
+alternative_endif
+.endm
+
+#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \
+ alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
+
+.macro user_alt, label, oldinstr, newinstr, cond
+9999: alternative_insn "\oldinstr", "\newinstr", \cond
+ _asm_extable 9999b, \label
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature));
+ *
+ * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
+ * N.B. If CONFIG_FOO is specified, but not selected, the whole block
+ * will be omitted, including oldinstr.
+ */
+#define ALTERNATIVE(oldinstr, newinstr, ...) \
+ _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1)
+
+#endif /* __ASM_ALTERNATIVE_MACROS_H */
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index 619db9b4c9d5..a38b92e11811 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -2,17 +2,13 @@
#ifndef __ASM_ALTERNATIVE_H
#define __ASM_ALTERNATIVE_H
-#include <asm/cpucaps.h>
-#include <asm/insn.h>
-
-#define ARM64_CB_PATCH ARM64_NCAPS
+#include <asm/alternative-macros.h>
#ifndef __ASSEMBLY__
#include <linux/init.h>
#include <linux/types.h>
#include <linux/stddef.h>
-#include <linux/stringify.h>
struct alt_instr {
s32 orig_offset; /* offset to original instruction */
@@ -35,264 +31,5 @@ void apply_alternatives_module(void *start, size_t length);
static inline void apply_alternatives_module(void *start, size_t length) { }
#endif
-#define ALTINSTR_ENTRY(feature) \
- " .word 661b - .\n" /* label */ \
- " .word 663f - .\n" /* new instruction */ \
- " .hword " __stringify(feature) "\n" /* feature bit */ \
- " .byte 662b-661b\n" /* source len */ \
- " .byte 664f-663f\n" /* replacement len */
-
-#define ALTINSTR_ENTRY_CB(feature, cb) \
- " .word 661b - .\n" /* label */ \
- " .word " __stringify(cb) "- .\n" /* callback */ \
- " .hword " __stringify(feature) "\n" /* feature bit */ \
- " .byte 662b-661b\n" /* source len */ \
- " .byte 664f-663f\n" /* replacement len */
-
-/*
- * alternative assembly primitive:
- *
- * If any of these .org directive fail, it means that insn1 and insn2
- * don't have the same length. This used to be written as
- *
- * .if ((664b-663b) != (662b-661b))
- * .error "Alternatives instruction length mismatch"
- * .endif
- *
- * but most assemblers die if insn1 or insn2 have a .inst. This should
- * be fixed in a binutils release posterior to 2.25.51.0.2 (anything
- * containing commit 4e4d08cf7399b606 or c1baaddf8861).
- *
- * Alternatives with callbacks do not generate replacement instructions.
- */
-#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \
- ".if "__stringify(cfg_enabled)" == 1\n" \
- "661:\n\t" \
- oldinstr "\n" \
- "662:\n" \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(feature) \
- ".popsection\n" \
- ".subsection 1\n" \
- "663:\n\t" \
- newinstr "\n" \
- "664:\n\t" \
- ".org . - (664b-663b) + (662b-661b)\n\t" \
- ".org . - (662b-661b) + (664b-663b)\n\t" \
- ".previous\n" \
- ".endif\n"
-
-#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \
- ".if "__stringify(cfg_enabled)" == 1\n" \
- "661:\n\t" \
- oldinstr "\n" \
- "662:\n" \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY_CB(feature, cb) \
- ".popsection\n" \
- "663:\n\t" \
- "664:\n\t" \
- ".endif\n"
-
-#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \
- __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
-
-#define ALTERNATIVE_CB(oldinstr, cb) \
- __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb)
-#else
-
-#include <asm/assembler.h>
-
-.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
- .word \orig_offset - .
- .word \alt_offset - .
- .hword \feature
- .byte \orig_len
- .byte \alt_len
-.endm
-
-.macro alternative_insn insn1, insn2, cap, enable = 1
- .if \enable
-661: \insn1
-662: .pushsection .altinstructions, "a"
- altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f
- .popsection
- .subsection 1
-663: \insn2
-664: .previous
- .org . - (664b-663b) + (662b-661b)
- .org . - (662b-661b) + (664b-663b)
- .endif
-.endm
-
-/*
- * Alternative sequences
- *
- * The code for the case where the capability is not present will be
- * assembled and linked as normal. There are no restrictions on this
- * code.
- *
- * The code for the case where the capability is present will be
- * assembled into a special section to be used for dynamic patching.
- * Code for that case must:
- *
- * 1. Be exactly the same length (in bytes) as the default code
- * sequence.
- *
- * 2. Not contain a branch target that is used outside of the
- * alternative sequence it is defined in (branches into an
- * alternative sequence are not fixed up).
- */
-
-/*
- * Begin an alternative code sequence.
- */
-.macro alternative_if_not cap
- .set .Lasm_alt_mode, 0
- .pushsection .altinstructions, "a"
- altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
- .popsection
-661:
-.endm
-
-.macro alternative_if cap
- .set .Lasm_alt_mode, 1
- .pushsection .altinstructions, "a"
- altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f
- .popsection
- .subsection 1
- .align 2 /* So GAS knows label 661 is suitably aligned */
-661:
-.endm
-
-.macro alternative_cb cb
- .set .Lasm_alt_mode, 0
- .pushsection .altinstructions, "a"
- altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
- .popsection
-661:
-.endm
-
-/*
- * Provide the other half of the alternative code sequence.
- */
-.macro alternative_else
-662:
- .if .Lasm_alt_mode==0
- .subsection 1
- .else
- .previous
- .endif
-663:
-.endm
-
-/*
- * Complete an alternative code sequence.
- */
-.macro alternative_endif
-664:
- .if .Lasm_alt_mode==0
- .previous
- .endif
- .org . - (664b-663b) + (662b-661b)
- .org . - (662b-661b) + (664b-663b)
-.endm
-
-/*
- * Callback-based alternative epilogue
- */
-.macro alternative_cb_end
-662:
-.endm
-
-/*
- * Provides a trivial alternative or default sequence consisting solely
- * of NOPs. The number of NOPs is chosen automatically to match the
- * previous case.
- */
-.macro alternative_else_nop_endif
-alternative_else
- nops (662b-661b) / AARCH64_INSN_SIZE
-alternative_endif
-.endm
-
-#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \
- alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
-
-.macro user_alt, label, oldinstr, newinstr, cond
-9999: alternative_insn "\oldinstr", "\newinstr", \cond
- _asm_extable 9999b, \label
-.endm
-
-/*
- * Generate the assembly for UAO alternatives with exception table entries.
- * This is complicated as there is no post-increment or pair versions of the
- * unprivileged instructions, and USER() only works for single instructions.
- */
-#ifdef CONFIG_ARM64_UAO
- .macro uao_ldp l, reg1, reg2, addr, post_inc
- alternative_if_not ARM64_HAS_UAO
-8888: ldp \reg1, \reg2, [\addr], \post_inc;
-8889: nop;
- nop;
- alternative_else
- ldtr \reg1, [\addr];
- ldtr \reg2, [\addr, #8];
- add \addr, \addr, \post_inc;
- alternative_endif
-
- _asm_extable 8888b,\l;
- _asm_extable 8889b,\l;
- .endm
-
- .macro uao_stp l, reg1, reg2, addr, post_inc
- alternative_if_not ARM64_HAS_UAO
-8888: stp \reg1, \reg2, [\addr], \post_inc;
-8889: nop;
- nop;
- alternative_else
- sttr \reg1, [\addr];
- sttr \reg2, [\addr, #8];
- add \addr, \addr, \post_inc;
- alternative_endif
-
- _asm_extable 8888b,\l;
- _asm_extable 8889b,\l;
- .endm
-
- .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
- alternative_if_not ARM64_HAS_UAO
-8888: \inst \reg, [\addr], \post_inc;
- nop;
- alternative_else
- \alt_inst \reg, [\addr];
- add \addr, \addr, \post_inc;
- alternative_endif
-
- _asm_extable 8888b,\l;
- .endm
-#else
- .macro uao_ldp l, reg1, reg2, addr, post_inc
- USER(\l, ldp \reg1, \reg2, [\addr], \post_inc)
- .endm
- .macro uao_stp l, reg1, reg2, addr, post_inc
- USER(\l, stp \reg1, \reg2, [\addr], \post_inc)
- .endm
- .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
- USER(\l, \inst \reg, [\addr], \post_inc)
- .endm
-#endif
-
-#endif /* __ASSEMBLY__ */
-
-/*
- * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature));
- *
- * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
- * N.B. If CONFIG_FOO is specified, but not selected, the whole block
- * will be omitted, including oldinstr.
- */
-#define ALTERNATIVE(oldinstr, newinstr, ...) \
- _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1)
-
+#endif /* __ASSEMBLY__ */
#endif /* __ASM_ALTERNATIVE_H */
diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index f68a0e64482a..0fa95d1b311f 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -2,7 +2,7 @@
#ifndef __ASM_ASM_UACCESS_H
#define __ASM_ASM_UACCESS_H
-#include <asm/alternative.h>
+#include <asm/alternative-macros.h>
#include <asm/kernel-pgtable.h>
#include <asm/mmu.h>
#include <asm/sysreg.h>
@@ -58,4 +58,33 @@ alternative_else_nop_endif
.endm
#endif
+/*
+ * Generate the assembly for LDTR/STTR with exception table entries.
+ * This is complicated as there is no post-increment or pair versions of the
+ * unprivileged instructions, and USER() only works for single instructions.
+ */
+ .macro user_ldp l, reg1, reg2, addr, post_inc
+8888: ldtr \reg1, [\addr];
+8889: ldtr \reg2, [\addr, #8];
+ add \addr, \addr, \post_inc;
+
+ _asm_extable 8888b,\l;
+ _asm_extable 8889b,\l;
+ .endm
+
+ .macro user_stp l, reg1, reg2, addr, post_inc
+8888: sttr \reg1, [\addr];
+8889: sttr \reg2, [\addr, #8];
+ add \addr, \addr, \post_inc;
+
+ _asm_extable 8888b,\l;
+ _asm_extable 8889b,\l;
+ .endm
+
+ .macro user_ldst l, inst, reg, addr, post_inc
+8888: \inst \reg, [\addr];
+ add \addr, \addr, \post_inc;
+
+ _asm_extable 8888b,\l;
+ .endm
#endif
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 162539d4c8cd..b77d997b173b 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -16,8 +16,6 @@
#define ARM64_WORKAROUND_CAVIUM_23154 6
#define ARM64_WORKAROUND_834220 7
#define ARM64_HAS_NO_HW_PREFETCH 8
-#define ARM64_HAS_UAO 9
-#define ARM64_ALT_PAN_NOT_UAO 10
#define ARM64_HAS_VIRT_HOST_EXTN 11
#define ARM64_WORKAROUND_CAVIUM_27456 12
#define ARM64_HAS_32BIT_EL0 13
@@ -66,7 +64,9 @@
#define ARM64_HAS_TLB_RANGE 56
#define ARM64_MTE 57
#define ARM64_WORKAROUND_1508412 58
+#define ARM64_HAS_LDAPR 59
+#define ARM64_KVM_PROTECTED_MODE 60
-#define ARM64_NCAPS 59
+#define ARM64_NCAPS 61
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 461277ae7a48..16063c813dcd 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -669,10 +669,16 @@ static __always_inline bool system_supports_fpsimd(void)
return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD);
}
+static inline bool system_uses_hw_pan(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_PAN) &&
+ cpus_have_const_cap(ARM64_HAS_PAN);
+}
+
static inline bool system_uses_ttbr0_pan(void)
{
return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) &&
- !cpus_have_const_cap(ARM64_HAS_PAN);
+ !system_uses_hw_pan();
}
static __always_inline bool system_supports_sve(void)
@@ -769,6 +775,13 @@ static inline bool cpu_has_hw_af(void)
ID_AA64MMFR1_HADBS_SHIFT);
}
+static inline bool cpu_has_pan(void)
+{
+ u64 mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
+ return cpuid_feature_extract_unsigned_field(mmfr1,
+ ID_AA64MMFR1_PAN_SHIFT);
+}
+
#ifdef CONFIG_ARM64_AMU_EXTN
/* Check whether the cpu supports the Activity Monitors Unit (AMU) */
extern bool cpu_has_amu_feat(int cpu);
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
new file mode 100644
index 000000000000..a7f5a1bbc8ac
--- /dev/null
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#ifndef __ARM_KVM_INIT_H__
+#define __ARM_KVM_INIT_H__
+
+#ifndef __ASSEMBLY__
+#error Assembly-only header
+#endif
+
+#include <asm/kvm_arm.h>
+#include <asm/ptrace.h>
+#include <asm/sysreg.h>
+#include <linux/irqchip/arm-gic-v3.h>
+
+.macro __init_el2_sctlr
+ mov_q x0, INIT_SCTLR_EL2_MMU_OFF
+ msr sctlr_el2, x0
+ isb
+.endm
+
+/*
+ * Allow Non-secure EL1 and EL0 to access physical timer and counter.
+ * This is not necessary for VHE, since the host kernel runs in EL2,
+ * and EL0 accesses are configured in the later stage of boot process.
+ * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout
+ * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined
+ * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1
+ * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in
+ * EL2.
+ */
+.macro __init_el2_timers mode
+.ifeqs "\mode", "nvhe"
+ mrs x0, cnthctl_el2
+ orr x0, x0, #3 // Enable EL1 physical timers
+ msr cnthctl_el2, x0
+.endif
+ msr cntvoff_el2, xzr // Clear virtual offset
+.endm
+
+.macro __init_el2_debug mode
+ mrs x1, id_aa64dfr0_el1
+ sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4
+ cmp x0, #1
+ b.lt 1f // Skip if no PMU present
+ mrs x0, pmcr_el0 // Disable debug access traps
+ ubfx x0, x0, #11, #5 // to EL2 and allow access to
+1:
+ csel x2, xzr, x0, lt // all PMU counters from EL1
+
+ /* Statistical profiling */
+ ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
+ cbz x0, 3f // Skip if SPE not present
+
+.ifeqs "\mode", "nvhe"
+ mrs_s x0, SYS_PMBIDR_EL1 // If SPE available at EL2,
+ and x0, x0, #(1 << SYS_PMBIDR_EL1_P_SHIFT)
+ cbnz x0, 2f // then permit sampling of physical
+ mov x0, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \
+ 1 << SYS_PMSCR_EL2_PA_SHIFT)
+ msr_s SYS_PMSCR_EL2, x0 // addresses and physical counter
+2:
+ mov x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
+ orr x2, x2, x0 // If we don't have VHE, then
+ // use EL1&0 translation.
+.else
+ orr x2, x2, #MDCR_EL2_TPMS // For VHE, use EL2 translation
+ // and disable access from EL1
+.endif
+
+3:
+ msr mdcr_el2, x2 // Configure debug traps
+.endm
+
+/* LORegions */
+.macro __init_el2_lor
+ mrs x1, id_aa64mmfr1_el1
+ ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
+ cbz x0, 1f
+ msr_s SYS_LORC_EL1, xzr
+1:
+.endm
+
+/* Stage-2 translation */
+.macro __init_el2_stage2
+ msr vttbr_el2, xzr
+.endm
+
+/* GICv3 system register access */
+.macro __init_el2_gicv3
+ mrs x0, id_aa64pfr0_el1
+ ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4
+ cbz x0, 1f
+
+ mrs_s x0, SYS_ICC_SRE_EL2
+ orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1
+ orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1
+ msr_s SYS_ICC_SRE_EL2, x0
+ isb // Make sure SRE is now set
+ mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back,
+ tbz x0, #0, 1f // and check that it sticks
+ msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults
+1:
+.endm
+
+.macro __init_el2_hstr
+ msr hstr_el2, xzr // Disable CP15 traps to EL2
+.endm
+
+/* Virtual CPU ID registers */
+.macro __init_el2_nvhe_idregs
+ mrs x0, midr_el1
+ mrs x1, mpidr_el1
+ msr vpidr_el2, x0
+ msr vmpidr_el2, x1
+.endm
+
+/* Coprocessor traps */
+.macro __init_el2_nvhe_cptr
+ mov x0, #0x33ff
+ msr cptr_el2, x0 // Disable copro. traps to EL2
+.endm
+
+/* SVE register access */
+.macro __init_el2_nvhe_sve
+ mrs x1, id_aa64pfr0_el1
+ ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
+ cbz x1, 1f
+
+ bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps
+ msr cptr_el2, x0 // Disable copro. traps to EL2
+ isb
+ mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
+ msr_s SYS_ZCR_EL2, x1 // length for EL1.
+1:
+.endm
+
+.macro __init_el2_nvhe_prepare_eret
+ mov x0, #INIT_PSTATE_EL1
+ msr spsr_el2, x0
+.endm
+
+/**
+ * Initialize EL2 registers to sane values. This should be called early on all
+ * cores that were booted in EL2.
+ *
+ * Regs: x0, x1 and x2 are clobbered.
+ */
+.macro init_el2_state mode
+.ifnes "\mode", "vhe"
+.ifnes "\mode", "nvhe"
+.error "Invalid 'mode' argument"
+.endif
+.endif
+
+ __init_el2_sctlr
+ __init_el2_timers \mode
+ __init_el2_debug \mode
+ __init_el2_lor
+ __init_el2_stage2
+ __init_el2_gicv3
+ __init_el2_hstr
+
+ /*
+ * When VHE is not in use, early init of EL2 needs to be done here.
+ * When VHE _is_ in use, EL1 will not be used in the host and
+ * requires no configuration, and all non-hyp-specific EL2 setup
+ * will be done via the _EL1 system register aliases in __cpu_setup.
+ */
+.ifeqs "\mode", "nvhe"
+ __init_el2_nvhe_idregs
+ __init_el2_nvhe_cptr
+ __init_el2_nvhe_sve
+ __init_el2_nvhe_prepare_eret
+.endif
+.endm
+
+#endif /* __ARM_KVM_INIT_H__ */
diff --git a/arch/arm64/include/asm/exec.h b/arch/arm64/include/asm/exec.h
index 1aae6f9962fc..9a1c22ce664b 100644
--- a/arch/arm64/include/asm/exec.h
+++ b/arch/arm64/include/asm/exec.h
@@ -10,6 +10,5 @@
#include <linux/sched.h>
extern unsigned long arch_align_stack(unsigned long sp);
-void uao_thread_switch(struct task_struct *next);
#endif /* __ASM_EXEC_H */
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 97f6a63810ec..8e41faa37c69 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -16,7 +16,7 @@
do { \
unsigned int loops = FUTEX_MAX_LOOPS; \
\
- uaccess_enable(); \
+ uaccess_enable_privileged(); \
asm volatile( \
" prfm pstl1strm, %2\n" \
"1: ldxr %w1, %2\n" \
@@ -39,7 +39,7 @@ do { \
"+r" (loops) \
: "r" (oparg), "Ir" (-EFAULT), "Ir" (-EAGAIN) \
: "memory"); \
- uaccess_disable(); \
+ uaccess_disable_privileged(); \
} while (0)
static inline int
@@ -95,7 +95,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
return -EFAULT;
uaddr = __uaccess_mask_ptr(_uaddr);
- uaccess_enable();
+ uaccess_enable_privileged();
asm volatile("// futex_atomic_cmpxchg_inatomic\n"
" prfm pstl1strm, %2\n"
"1: ldxr %w1, %2\n"
@@ -118,7 +118,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp), "+r" (loops)
: "r" (oldval), "r" (newval), "Ir" (-EFAULT), "Ir" (-EAGAIN)
: "memory");
- uaccess_disable();
+ uaccess_disable_privileged();
if (!ret)
*uval = val;
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 4b39293d0f72..4ebb9c054ccc 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -10,8 +10,7 @@
#include <linux/build_bug.h>
#include <linux/types.h>
-/* A64 instructions are always 32 bits. */
-#define AARCH64_INSN_SIZE 4
+#include <asm/alternative.h>
#ifndef __ASSEMBLY__
/*
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 64ce29378467..4e90c2debf70 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -80,6 +80,7 @@
HCR_FMO | HCR_IMO | HCR_PTW )
#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
+#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
/* TCR_EL2 Registers bits */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 4a6a77d8d13e..7ccf770c53d9 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -148,6 +148,14 @@ extern void *__vhe_undefined_symbol;
#endif
+struct kvm_nvhe_init_params {
+ unsigned long mair_el2;
+ unsigned long tcr_el2;
+ unsigned long tpidr_el2;
+ unsigned long stack_hyp_va;
+ phys_addr_t pgd_pa;
+};
+
/* Translate a kernel address @ptr into its equivalent linear mapping */
#define kvm_ksym_ref(ptr) \
({ \
@@ -163,10 +171,8 @@ struct kvm_vcpu;
struct kvm_s2_mmu;
DECLARE_KVM_NVHE_SYM(__kvm_hyp_init);
-DECLARE_KVM_NVHE_SYM(__kvm_hyp_host_vector);
DECLARE_KVM_HYP_SYM(__kvm_hyp_vector);
#define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init)
-#define __kvm_hyp_host_vector CHOOSE_NVHE_SYM(__kvm_hyp_host_vector)
#define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector)
extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 21ce5c420247..11beda85ee7e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -50,6 +50,16 @@
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)
+/*
+ * Mode of operation configurable with kvm-arm.mode early param.
+ * See Documentation/admin-guide/kernel-parameters.txt for more information.
+ */
+enum kvm_mode {
+ KVM_MODE_DEFAULT,
+ KVM_MODE_PROTECTED,
+};
+enum kvm_mode kvm_get_mode(void);
+
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
extern unsigned int kvm_sve_max_vl;
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 6b664de5ec1f..c0450828378b 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -14,6 +14,7 @@
DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
DECLARE_PER_CPU(unsigned long, kvm_hyp_vector);
+DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
#define read_sysreg_elx(r,nvh,vh) \
({ \
@@ -92,10 +93,11 @@ void deactivate_traps_vhe_put(void);
u64 __guest_enter(struct kvm_vcpu *vcpu);
+bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt);
+
void __noreturn hyp_panic(void);
#ifdef __KVM_NVHE_HYPERVISOR__
void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
#endif
#endif /* __ARM64_KVM_HYP_H__ */
-
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index e298191a854d..e52d82aeadca 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -94,6 +94,30 @@ alternative_cb_end
sub \reg, \reg, \tmp
.endm
+/*
+ * Convert a kernel image address to a hyp VA
+ * reg: kernel address to be converted in place
+ * tmp: temporary register
+ *
+ * The actual code generation takes place in kvm_get_kimage_voffset, and
+ * the instructions below are only there to reserve the space and
+ * perform the register allocation (kvm_update_kimg_phys_offset uses the
+ * specific registers encoded in the instructions).
+ */
+.macro kimg_hyp_va reg, tmp
+alternative_cb kvm_update_kimg_phys_offset
+ movz \tmp, #0
+ movk \tmp, #0, lsl #16
+ movk \tmp, #0, lsl #32
+ movk \tmp, #0, lsl #48
+alternative_cb_end
+
+ sub \reg, \reg, \tmp
+ mov_q \tmp, PAGE_OFFSET
+ orr \reg, \reg, \tmp
+ kern_hyp_va \reg
+.endm
+
#else
#include <linux/pgtable.h>
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 1599e17379d8..8f1661603b78 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -239,6 +239,12 @@ PERCPU_RET_OP(add, add, ldadd)
#define this_cpu_cmpxchg_8(pcp, o, n) \
_pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#ifdef __KVM_NVHE_HYPERVISOR__
+extern unsigned long __hyp_per_cpu_offset(unsigned int cpu);
+#define __per_cpu_offset
+#define per_cpu_offset(cpu) __hyp_per_cpu_offset((cpu))
+#endif
+
#include <asm-generic/percpu.h>
/* Redefine macros for nVHE hyp under DEBUG_PREEMPT to avoid its dependencies. */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index fce8cbecd6bc..724249f37af5 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -8,9 +8,6 @@
#ifndef __ASM_PROCESSOR_H
#define __ASM_PROCESSOR_H
-#define KERNEL_DS UL(-1)
-#define USER_DS ((UL(1) << VA_BITS) - 1)
-
/*
* On arm64 systems, unaligned accesses by the CPU are cheap, and so there is
* no point in shifting all network buffers by 2 bytes just to make some IP
@@ -48,6 +45,7 @@
#define DEFAULT_MAP_WINDOW_64 (UL(1) << VA_BITS_MIN)
#define TASK_SIZE_64 (UL(1) << vabits_actual)
+#define TASK_SIZE_MAX (UL(1) << VA_BITS)
#ifdef CONFIG_COMPAT
#if defined(CONFIG_ARM64_64K_PAGES) && defined(CONFIG_KUSER_HELPERS)
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 997cf8c8cd52..2bb53bc3c326 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -16,6 +16,11 @@
#define CurrentEL_EL1 (1 << 2)
#define CurrentEL_EL2 (2 << 2)
+#define INIT_PSTATE_EL1 \
+ (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1h)
+#define INIT_PSTATE_EL2 \
+ (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL2h)
+
/*
* PMR values used to mask/unmask interrupts.
*
@@ -188,8 +193,7 @@ struct pt_regs {
s32 syscallno;
u32 unused2;
#endif
-
- u64 orig_addr_limit;
+ u64 sdei_ttbr1;
/* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */
u64 pmr_save;
u64 stackframe[2];
diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h
new file mode 100644
index 000000000000..1bce62fa908a
--- /dev/null
+++ b/arch/arm64/include/asm/rwonce.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+#ifndef __ASM_RWONCE_H
+#define __ASM_RWONCE_H
+
+#ifdef CONFIG_LTO
+
+#include <linux/compiler_types.h>
+#include <asm/alternative-macros.h>
+
+#ifndef BUILD_VDSO
+
+#ifdef CONFIG_AS_HAS_LDAPR
+#define __LOAD_RCPC(sfx, regs...) \
+ ALTERNATIVE( \
+ "ldar" #sfx "\t" #regs, \
+ ".arch_extension rcpc\n" \
+ "ldapr" #sfx "\t" #regs, \
+ ARM64_HAS_LDAPR)
+#else
+#define __LOAD_RCPC(sfx, regs...) "ldar" #sfx "\t" #regs
+#endif /* CONFIG_AS_HAS_LDAPR */
+
+/*
+ * When building with LTO, there is an increased risk of the compiler
+ * converting an address dependency headed by a READ_ONCE() invocation
+ * into a control dependency and consequently allowing for harmful
+ * reordering by the CPU.
+ *
+ * Ensure that such transformations are harmless by overriding the generic
+ * READ_ONCE() definition with one that provides RCpc acquire semantics
+ * when building with LTO.
+ */
+#define __READ_ONCE(x) \
+({ \
+ typeof(&(x)) __x = &(x); \
+ int atomic = 1; \
+ union { __unqual_scalar_typeof(*__x) __val; char __c[1]; } __u; \
+ switch (sizeof(x)) { \
+ case 1: \
+ asm volatile(__LOAD_RCPC(b, %w0, %1) \
+ : "=r" (*(__u8 *)__u.__c) \
+ : "Q" (*__x) : "memory"); \
+ break; \
+ case 2: \
+ asm volatile(__LOAD_RCPC(h, %w0, %1) \
+ : "=r" (*(__u16 *)__u.__c) \
+ : "Q" (*__x) : "memory"); \
+ break; \
+ case 4: \
+ asm volatile(__LOAD_RCPC(, %w0, %1) \
+ : "=r" (*(__u32 *)__u.__c) \
+ : "Q" (*__x) : "memory"); \
+ break; \
+ case 8: \
+ asm volatile(__LOAD_RCPC(, %0, %1) \
+ : "=r" (*(__u64 *)__u.__c) \
+ : "Q" (*__x) : "memory"); \
+ break; \
+ default: \
+ atomic = 0; \
+ } \
+ atomic ? (typeof(*__x))__u.__val : (*(volatile typeof(__x))__x);\
+})
+
+#endif /* !BUILD_VDSO */
+#endif /* CONFIG_LTO */
+
+#include <asm-generic/rwonce.h>
+
+#endif /* __ASM_RWONCE_H */
diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
index 3994169985ef..8ff579361731 100644
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -11,6 +11,7 @@ extern char __alt_instructions[], __alt_instructions_end[];
extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
extern char __hyp_text_start[], __hyp_text_end[];
+extern char __hyp_data_ro_after_init_start[], __hyp_data_ro_after_init_end[];
extern char __idmap_text_start[], __idmap_text_end[];
extern char __initdata_begin[], __initdata_end[];
extern char __inittext_begin[], __inittext_end[];
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 2e7f529ec5a6..bcb01ca15325 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -46,9 +46,9 @@ DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
* Logical CPU mapping.
*/
extern u64 __cpu_logical_map[NR_CPUS];
-extern u64 cpu_logical_map(int cpu);
+extern u64 cpu_logical_map(unsigned int cpu);
-static inline void set_cpu_logical_map(int cpu, u64 hwid)
+static inline void set_cpu_logical_map(unsigned int cpu, u64 hwid)
{
__cpu_logical_map[cpu] = hwid;
}
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 500efe405b48..82521cdbfc1c 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -98,6 +98,10 @@
#define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift))
#define SET_PSTATE_TCO(x) __emit_inst(0xd500401f | PSTATE_TCO | ((!!x) << PSTATE_Imm_shift))
+#define set_pstate_pan(x) asm volatile(SET_PSTATE_PAN(x))
+#define set_pstate_uao(x) asm volatile(SET_PSTATE_UAO(x))
+#define set_pstate_ssbs(x) asm volatile(SET_PSTATE_SSBS(x))
+
#define __SYS_BARRIER_INSN(CRm, op2, Rt) \
__emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f))
@@ -583,6 +587,9 @@
#define ENDIAN_SET_EL2 0
#endif
+#define INIT_SCTLR_EL2_MMU_OFF \
+ (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
+
/* SCTLR_EL1 specific flags. */
#define SCTLR_EL1_ATA0 (BIT(42))
@@ -616,12 +623,15 @@
#define ENDIAN_SET_EL1 0
#endif
-#define SCTLR_EL1_SET (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA |\
- SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I |\
- SCTLR_EL1_DZE | SCTLR_EL1_UCT |\
- SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\
- SCTLR_ELx_ITFSB| SCTLR_ELx_ATA | SCTLR_EL1_ATA0 |\
- ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1)
+#define INIT_SCTLR_EL1_MMU_OFF \
+ (ENDIAN_SET_EL1 | SCTLR_EL1_RES1)
+
+#define INIT_SCTLR_EL1_MMU_ON \
+ (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_EL1_SA0 | \
+ SCTLR_EL1_SED | SCTLR_ELx_I | SCTLR_EL1_DZE | SCTLR_EL1_UCT | \
+ SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \
+ SCTLR_ELx_ATA | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI | \
+ SCTLR_EL1_RES1)
/* MAIR_ELx memory attributes (used by Linux) */
#define MAIR_ATTR_DEVICE_nGnRnE UL(0x00)
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 1fbab854a51b..015beafe58f5 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -18,14 +18,11 @@ struct task_struct;
#include <asm/stack_pointer.h>
#include <asm/types.h>
-typedef unsigned long mm_segment_t;
-
/*
* low level task data that entry.S needs immediate access to.
*/
struct thread_info {
unsigned long flags; /* low level flags */
- mm_segment_t addr_limit; /* address limit */
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
u64 ttbr0; /* saved TTBR0_EL1 */
#endif
@@ -66,8 +63,7 @@ void arch_release_task_struct(struct task_struct *tsk);
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
#define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */
-#define TIF_FSCHECK 5 /* Check FS is USER_DS on return */
-#define TIF_MTE_ASYNC_FAULT 6 /* MTE Asynchronous Tag Check Fault */
+#define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */
#define TIF_SYSCALL_TRACE 8 /* syscall trace active */
#define TIF_SYSCALL_AUDIT 9 /* syscall auditing */
#define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */
@@ -93,7 +89,6 @@ void arch_release_task_struct(struct task_struct *tsk);
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_UPROBE (1 << TIF_UPROBE)
-#define _TIF_FSCHECK (1 << TIF_FSCHECK)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_32BIT (1 << TIF_32BIT)
#define _TIF_SVE (1 << TIF_SVE)
@@ -101,7 +96,7 @@ void arch_release_task_struct(struct task_struct *tsk);
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
- _TIF_UPROBE | _TIF_FSCHECK | _TIF_MTE_ASYNC_FAULT)
+ _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT)
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
@@ -119,7 +114,6 @@ void arch_release_task_struct(struct task_struct *tsk);
{ \
.flags = _TIF_FOREIGN_FPSTATE, \
.preempt_count = INIT_PREEMPT_COUNT, \
- .addr_limit = KERNEL_DS, \
INIT_SCS \
}
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 991dd5f031e4..769cad7b4910 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -24,44 +24,18 @@
#include <asm/memory.h>
#include <asm/extable.h>
-#define get_fs() (current_thread_info()->addr_limit)
-
-static inline void set_fs(mm_segment_t fs)
-{
- current_thread_info()->addr_limit = fs;
-
- /*
- * Prevent a mispredicted conditional call to set_fs from forwarding
- * the wrong address limit to access_ok under speculation.
- */
- spec_bar();
-
- /* On user-mode return, check fs is correct */
- set_thread_flag(TIF_FSCHECK);
-
- /*
- * Enable/disable UAO so that copy_to_user() etc can access
- * kernel memory with the unprivileged instructions.
- */
- if (IS_ENABLED(CONFIG_ARM64_UAO) && fs == KERNEL_DS)
- asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO));
- else
- asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO,
- CONFIG_ARM64_UAO));
-}
-
-#define uaccess_kernel() (get_fs() == KERNEL_DS)
+#define HAVE_GET_KERNEL_NOFAULT
/*
* Test whether a block of memory is a valid user space address.
* Returns 1 if the range is valid, 0 otherwise.
*
* This is equivalent to the following test:
- * (u65)addr + (u65)size <= (u65)current->addr_limit + 1
+ * (u65)addr + (u65)size <= (u65)TASK_SIZE_MAX
*/
static inline unsigned long __range_ok(const void __user *addr, unsigned long size)
{
- unsigned long ret, limit = current_thread_info()->addr_limit;
+ unsigned long ret, limit = TASK_SIZE_MAX - 1;
/*
* Asynchronous I/O running in a kernel thread does not have the
@@ -94,7 +68,6 @@ static inline unsigned long __range_ok(const void __user *addr, unsigned long si
}
#define access_ok(addr, size) __range_ok(addr, size)
-#define user_addr_max get_fs
#define _ASM_EXTABLE(from, to) \
" .pushsection __ex_table, \"a\"\n" \
@@ -186,47 +159,26 @@ static inline void __uaccess_enable_hw_pan(void)
CONFIG_ARM64_PAN));
}
-#define __uaccess_disable(alt) \
-do { \
- if (!uaccess_ttbr0_disable()) \
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), alt, \
- CONFIG_ARM64_PAN)); \
-} while (0)
-
-#define __uaccess_enable(alt) \
-do { \
- if (!uaccess_ttbr0_enable()) \
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), alt, \
- CONFIG_ARM64_PAN)); \
-} while (0)
-
-static inline void uaccess_disable(void)
+static inline void uaccess_disable_privileged(void)
{
- __uaccess_disable(ARM64_HAS_PAN);
-}
+ if (uaccess_ttbr0_disable())
+ return;
-static inline void uaccess_enable(void)
-{
- __uaccess_enable(ARM64_HAS_PAN);
+ __uaccess_enable_hw_pan();
}
-/*
- * These functions are no-ops when UAO is present.
- */
-static inline void uaccess_disable_not_uao(void)
+static inline void uaccess_enable_privileged(void)
{
- __uaccess_disable(ARM64_ALT_PAN_NOT_UAO);
-}
+ if (uaccess_ttbr0_enable())
+ return;
-static inline void uaccess_enable_not_uao(void)
-{
- __uaccess_enable(ARM64_ALT_PAN_NOT_UAO);
+ __uaccess_disable_hw_pan();
}
/*
- * Sanitise a uaccess pointer such that it becomes NULL if above the
- * current addr_limit. In case the pointer is tagged (has the top byte set),
- * untag the pointer before checking.
+ * Sanitise a uaccess pointer such that it becomes NULL if above the maximum
+ * user address. In case the pointer is tagged (has the top byte set), untag
+ * the pointer before checking.
*/
#define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr)
static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
@@ -237,7 +189,7 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
" bics xzr, %3, %2\n"
" csel %0, %1, xzr, eq\n"
: "=&r" (safe_ptr)
- : "r" (ptr), "r" (current_thread_info()->addr_limit),
+ : "r" (ptr), "r" (TASK_SIZE_MAX - 1),
"r" (untagged_addr(ptr))
: "cc");
@@ -253,10 +205,9 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
* The "__xxx_error" versions set the third argument to -EFAULT if an error
* occurs, and leave it unchanged on success.
*/
-#define __get_user_asm(instr, alt_instr, reg, x, addr, err, feature) \
+#define __get_mem_asm(load, reg, x, addr, err) \
asm volatile( \
- "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \
- alt_instr " " reg "1, [%2]\n", feature) \
+ "1: " load " " reg "1, [%2]\n" \
"2:\n" \
" .section .fixup, \"ax\"\n" \
" .align 2\n" \
@@ -268,35 +219,36 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
: "+r" (err), "=&r" (x) \
: "r" (addr), "i" (-EFAULT))
-#define __raw_get_user(x, ptr, err) \
+#define __raw_get_mem(ldr, x, ptr, err) \
do { \
unsigned long __gu_val; \
- __chk_user_ptr(ptr); \
- uaccess_enable_not_uao(); \
switch (sizeof(*(ptr))) { \
case 1: \
- __get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr), \
- (err), ARM64_HAS_UAO); \
+ __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err)); \
break; \
case 2: \
- __get_user_asm("ldrh", "ldtrh", "%w", __gu_val, (ptr), \
- (err), ARM64_HAS_UAO); \
+ __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err)); \
break; \
case 4: \
- __get_user_asm("ldr", "ldtr", "%w", __gu_val, (ptr), \
- (err), ARM64_HAS_UAO); \
+ __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err)); \
break; \
case 8: \
- __get_user_asm("ldr", "ldtr", "%x", __gu_val, (ptr), \
- (err), ARM64_HAS_UAO); \
+ __get_mem_asm(ldr, "%x", __gu_val, (ptr), (err)); \
break; \
default: \
BUILD_BUG(); \
} \
- uaccess_disable_not_uao(); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
} while (0)
+#define __raw_get_user(x, ptr, err) \
+do { \
+ __chk_user_ptr(ptr); \
+ uaccess_ttbr0_enable(); \
+ __raw_get_mem("ldtr", x, ptr, err); \
+ uaccess_ttbr0_disable(); \
+} while (0)
+
#define __get_user_error(x, ptr, err) \
do { \
__typeof__(*(ptr)) __user *__p = (ptr); \
@@ -318,10 +270,19 @@ do { \
#define get_user __get_user
-#define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature) \
+#define __get_kernel_nofault(dst, src, type, err_label) \
+do { \
+ int __gkn_err = 0; \
+ \
+ __raw_get_mem("ldr", *((type *)(dst)), \
+ (__force type *)(src), __gkn_err); \
+ if (unlikely(__gkn_err)) \
+ goto err_label; \
+} while (0)
+
+#define __put_mem_asm(store, reg, x, addr, err) \
asm volatile( \
- "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \
- alt_instr " " reg "1, [%2]\n", feature) \
+ "1: " store " " reg "1, [%2]\n" \
"2:\n" \
" .section .fixup,\"ax\"\n" \
" .align 2\n" \
@@ -332,32 +293,33 @@ do { \
: "+r" (err) \
: "r" (x), "r" (addr), "i" (-EFAULT))
-#define __raw_put_user(x, ptr, err) \
+#define __raw_put_mem(str, x, ptr, err) \
do { \
__typeof__(*(ptr)) __pu_val = (x); \
- __chk_user_ptr(ptr); \
- uaccess_enable_not_uao(); \
switch (sizeof(*(ptr))) { \
case 1: \
- __put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr), \
- (err), ARM64_HAS_UAO); \
+ __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err)); \
break; \
case 2: \
- __put_user_asm("strh", "sttrh", "%w", __pu_val, (ptr), \
- (err), ARM64_HAS_UAO); \
+ __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err)); \
break; \
case 4: \
- __put_user_asm("str", "sttr", "%w", __pu_val, (ptr), \
- (err), ARM64_HAS_UAO); \
+ __put_mem_asm(str, "%w", __pu_val, (ptr), (err)); \
break; \
case 8: \
- __put_user_asm("str", "sttr", "%x", __pu_val, (ptr), \
- (err), ARM64_HAS_UAO); \
+ __put_mem_asm(str, "%x", __pu_val, (ptr), (err)); \
break; \
default: \
BUILD_BUG(); \
} \
- uaccess_disable_not_uao(); \
+} while (0)
+
+#define __raw_put_user(x, ptr, err) \
+do { \
+ __chk_user_ptr(ptr); \
+ uaccess_ttbr0_enable(); \
+ __raw_put_mem("sttr", x, ptr, err); \
+ uaccess_ttbr0_disable(); \
} while (0)
#define __put_user_error(x, ptr, err) \
@@ -381,14 +343,24 @@ do { \
#define put_user __put_user
+#define __put_kernel_nofault(dst, src, type, err_label) \
+do { \
+ int __pkn_err = 0; \
+ \
+ __raw_put_mem("str", *((type *)(src)), \
+ (__force type *)(dst), __pkn_err); \
+ if (unlikely(__pkn_err)) \
+ goto err_label; \
+} while(0)
+
extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n);
#define raw_copy_from_user(to, from, n) \
({ \
unsigned long __acfu_ret; \
- uaccess_enable_not_uao(); \
+ uaccess_ttbr0_enable(); \
__acfu_ret = __arch_copy_from_user((to), \
__uaccess_mask_ptr(from), (n)); \
- uaccess_disable_not_uao(); \
+ uaccess_ttbr0_disable(); \
__acfu_ret; \
})
@@ -396,10 +368,10 @@ extern unsigned long __must_check __arch_copy_to_user(void __user *to, const voi
#define raw_copy_to_user(to, from, n) \
({ \
unsigned long __actu_ret; \
- uaccess_enable_not_uao(); \
+ uaccess_ttbr0_enable(); \
__actu_ret = __arch_copy_to_user(__uaccess_mask_ptr(to), \
(from), (n)); \
- uaccess_disable_not_uao(); \
+ uaccess_ttbr0_disable(); \
__actu_ret; \
})
@@ -407,10 +379,10 @@ extern unsigned long __must_check __arch_copy_in_user(void __user *to, const voi
#define raw_copy_in_user(to, from, n) \
({ \
unsigned long __aciu_ret; \
- uaccess_enable_not_uao(); \
+ uaccess_ttbr0_enable(); \
__aciu_ret = __arch_copy_in_user(__uaccess_mask_ptr(to), \
__uaccess_mask_ptr(from), (n)); \
- uaccess_disable_not_uao(); \
+ uaccess_ttbr0_disable(); \
__aciu_ret; \
})
@@ -421,9 +393,9 @@ extern unsigned long __must_check __arch_clear_user(void __user *to, unsigned lo
static inline unsigned long __must_check __clear_user(void __user *to, unsigned long n)
{
if (access_ok(to, n)) {
- uaccess_enable_not_uao();
+ uaccess_ttbr0_enable();
n = __arch_clear_user(__uaccess_mask_ptr(to), n);
- uaccess_disable_not_uao();
+ uaccess_ttbr0_disable();
}
return n;
}
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 6069be50baf9..ee6a48df89d9 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -65,9 +65,19 @@ extern u32 __boot_cpu_mode[2];
void __hyp_set_vectors(phys_addr_t phys_vector_base);
void __hyp_reset_vectors(void);
+DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
+
/* Reports the availability of HYP mode */
static inline bool is_hyp_mode_available(void)
{
+ /*
+ * If KVM protected mode is initialized, all CPUs must have been booted
+ * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1.
+ */
+ if (IS_ENABLED(CONFIG_KVM) &&
+ static_branch_likely(&kvm_protected_mode_initialized))
+ return true;
+
return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 &&
__boot_cpu_mode[1] == BOOT_CPU_MODE_EL2);
}
@@ -75,6 +85,14 @@ static inline bool is_hyp_mode_available(void)
/* Check if the bootloader has booted CPUs in different modes */
static inline bool is_hyp_mode_mismatched(void)
{
+ /*
+ * If KVM protected mode is initialized, all CPUs must have been booted
+ * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1.
+ */
+ if (IS_ENABLED(CONFIG_KVM) &&
+ static_branch_likely(&kvm_protected_mode_initialized))
+ return false;
+
return __boot_cpu_mode[0] != __boot_cpu_mode[1];
}
@@ -97,6 +115,14 @@ static __always_inline bool has_vhe(void)
return cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN);
}
+static __always_inline bool is_protected_kvm_enabled(void)
+{
+ if (is_vhe_hyp_code())
+ return false;
+ else
+ return cpus_have_final_cap(ARM64_KVM_PROTECTED_MODE);
+}
+
#endif /* __ASSEMBLY__ */
#endif /* ! __ASM__VIRT_H */
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 73039949b5ce..a57cffb752e8 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -21,7 +21,8 @@
#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
#define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset)
-static int all_alternatives_applied;
+/* Volatile, as we may be patching the guts of READ_ONCE() */
+static volatile int all_alternatives_applied;
static DECLARE_BITMAP(applied_alternatives, ARM64_NCAPS);
@@ -205,7 +206,7 @@ static int __apply_alternatives_multi_stop(void *unused)
/* We always have a CPU 0 at this point (__init) */
if (smp_processor_id()) {
- while (!READ_ONCE(all_alternatives_applied))
+ while (!all_alternatives_applied)
cpu_relax();
isb();
} else {
@@ -217,7 +218,7 @@ static int __apply_alternatives_multi_stop(void *unused)
BUG_ON(all_alternatives_applied);
__apply_alternatives(&region, false, remaining_capabilities);
/* Barriers provided by the cache flushing */
- WRITE_ONCE(all_alternatives_applied, 1);
+ all_alternatives_applied = 1;
}
return 0;
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 7364de008bab..0e86e8b9cedd 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -277,7 +277,7 @@ static void __init register_insn_emulation_sysctl(void)
#define __user_swpX_asm(data, addr, res, temp, temp2, B) \
do { \
- uaccess_enable(); \
+ uaccess_enable_privileged(); \
__asm__ __volatile__( \
" mov %w3, %w7\n" \
"0: ldxr"B" %w2, [%4]\n" \
@@ -302,7 +302,7 @@ do { \
"i" (-EFAULT), \
"i" (__SWP_LL_SC_LOOPS) \
: "memory"); \
- uaccess_disable(); \
+ uaccess_disable_privileged(); \
} while (0)
#define __user_swp_asm(data, addr, res, temp, temp2) \
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 7d32fc959b1a..5e82488f1b82 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -30,7 +30,6 @@ int main(void)
BLANK();
DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
- DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
#endif
@@ -70,7 +69,7 @@ int main(void)
DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate));
DEFINE(S_PC, offsetof(struct pt_regs, pc));
DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno));
- DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit));
+ DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1));
DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save));
DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe));
DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
@@ -110,6 +109,11 @@ int main(void)
DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1]));
DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
DEFINE(HOST_DATA_CONTEXT, offsetof(struct kvm_host_data, host_ctxt));
+ DEFINE(NVHE_INIT_MAIR_EL2, offsetof(struct kvm_nvhe_init_params, mair_el2));
+ DEFINE(NVHE_INIT_TCR_EL2, offsetof(struct kvm_nvhe_init_params, tcr_el2));
+ DEFINE(NVHE_INIT_TPIDR_EL2, offsetof(struct kvm_nvhe_init_params, tpidr_el2));
+ DEFINE(NVHE_INIT_STACK_HYP_VA, offsetof(struct kvm_nvhe_init_params, stack_hyp_va));
+ DEFINE(NVHE_INIT_PGD_PA, offsetof(struct kvm_nvhe_init_params, pgd_pa));
#endif
#ifdef CONFIG_CPU_PM
DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp));
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 280b10762f6b..d96f4554282d 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -74,6 +74,7 @@
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
#include <asm/fpsimd.h>
+#include <asm/kvm_host.h>
#include <asm/mmu_context.h>
#include <asm/mte.h>
#include <asm/processor.h>
@@ -153,10 +154,6 @@ EXPORT_SYMBOL(cpu_hwcap_keys);
.width = 0, \
}
-/* meta feature for alternatives */
-static bool __maybe_unused
-cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused);
-
static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap);
static bool __system_matches_cap(unsigned int n);
@@ -1600,7 +1597,7 @@ static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
WARN_ON_ONCE(in_interrupt());
sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0);
- asm(SET_PSTATE_PAN(1));
+ set_pstate_pan(1);
}
#endif /* CONFIG_ARM64_PAN */
@@ -1709,6 +1706,21 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
}
#endif /* CONFIG_ARM64_MTE */
+#ifdef CONFIG_KVM
+static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused)
+{
+ if (kvm_get_mode() != KVM_MODE_PROTECTED)
+ return false;
+
+ if (is_kernel_in_hyp_mode()) {
+ pr_warn("Protected KVM not available with VHE\n");
+ return false;
+ }
+
+ return true;
+}
+#endif /* CONFIG_KVM */
+
/* Internal helper functions to match cpu capability type */
static bool
cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
@@ -1770,28 +1782,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
.matches = has_no_hw_prefetch,
},
-#ifdef CONFIG_ARM64_UAO
- {
- .desc = "User Access Override",
- .capability = ARM64_HAS_UAO,
- .type = ARM64_CPUCAP_SYSTEM_FEATURE,
- .matches = has_cpuid_feature,
- .sys_reg = SYS_ID_AA64MMFR2_EL1,
- .field_pos = ID_AA64MMFR2_UAO_SHIFT,
- .min_field_value = 1,
- /*
- * We rely on stop_machine() calling uao_thread_switch() to set
- * UAO immediately after patching.
- */
- },
-#endif /* CONFIG_ARM64_UAO */
-#ifdef CONFIG_ARM64_PAN
- {
- .capability = ARM64_ALT_PAN_NOT_UAO,
- .type = ARM64_CPUCAP_SYSTEM_FEATURE,
- .matches = cpufeature_pan_not_uao,
- },
-#endif /* CONFIG_ARM64_PAN */
#ifdef CONFIG_ARM64_VHE
{
.desc = "Virtualization Host Extensions",
@@ -1822,6 +1812,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.field_pos = ID_AA64PFR0_EL1_SHIFT,
.min_field_value = ID_AA64PFR0_EL1_32BIT_64BIT,
},
+ {
+ .desc = "Protected KVM",
+ .capability = ARM64_KVM_PROTECTED_MODE,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = is_kvm_protected_mode,
+ },
#endif
{
.desc = "Kernel page table isolation (KPTI)",
@@ -2138,6 +2134,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.cpu_enable = cpu_enable_mte,
},
#endif /* CONFIG_ARM64_MTE */
+ {
+ .desc = "RCpc load-acquire (LDAPR)",
+ .capability = ARM64_HAS_LDAPR,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .sys_reg = SYS_ID_AA64ISAR1_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64ISAR1_LRCPC_SHIFT,
+ .matches = has_cpuid_feature,
+ .min_field_value = 1,
+ },
{},
};
@@ -2652,7 +2658,7 @@ bool this_cpu_has_cap(unsigned int n)
* - The SYSTEM_FEATURE cpu_hwcaps may not have been set.
* In all other cases cpus_have_{const_}cap() should be used.
*/
-static bool __system_matches_cap(unsigned int n)
+static bool __maybe_unused __system_matches_cap(unsigned int n)
{
if (n < ARM64_NCAPS) {
const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[n];
@@ -2732,12 +2738,6 @@ void __init setup_cpu_features(void)
ARCH_DMA_MINALIGN);
}
-static bool __maybe_unused
-cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
-{
- return (__system_matches_cap(ARM64_HAS_PAN) && !__system_matches_cap(ARM64_HAS_UAO));
-}
-
static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap)
{
cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index b295fb912b12..bdd3b57b12f5 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -216,12 +216,6 @@ alternative_else_nop_endif
.else
add x21, sp, #S_FRAME_SIZE
get_current_task tsk
- /* Save the task's original addr_limit and set USER_DS */
- ldr x20, [tsk, #TSK_TI_ADDR_LIMIT]
- str x20, [sp, #S_ORIG_ADDR_LIMIT]
- mov x20, #USER_DS
- str x20, [tsk, #TSK_TI_ADDR_LIMIT]
- /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */
.endif /* \el == 0 */
mrs x22, elr_el1
mrs x23, spsr_el1
@@ -279,12 +273,6 @@ alternative_else_nop_endif
.macro kernel_exit, el
.if \el != 0
disable_daif
-
- /* Restore the task's original addr_limit. */
- ldr x20, [sp, #S_ORIG_ADDR_LIMIT]
- str x20, [tsk, #TSK_TI_ADDR_LIMIT]
-
- /* No need to restore UAO, it will be restored from SPSR_EL1 */
.endif
/* Restore pmr */
@@ -999,10 +987,9 @@ SYM_CODE_START(__sdei_asm_entry_trampoline)
mov x4, xzr
/*
- * Use reg->interrupted_regs.addr_limit to remember whether to unmap
- * the kernel on exit.
+ * Remember whether to unmap the kernel on exit.
*/
-1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)]
+1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)]
#ifdef CONFIG_RANDOMIZE_BASE
adr x4, tramp_vectors + PAGE_SIZE
@@ -1023,7 +1010,7 @@ NOKPROBE(__sdei_asm_entry_trampoline)
* x4: struct sdei_registered_event argument from registration time.
*/
SYM_CODE_START(__sdei_asm_exit_trampoline)
- ldr x4, [x4, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)]
+ ldr x4, [x4, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)]
cbnz x4, 1f
tramp_unmap_kernel tmp=x4
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index d8d9caf02834..957683029438 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -11,7 +11,6 @@
#include <linux/linkage.h>
#include <linux/init.h>
-#include <linux/irqchip/arm-gic-v3.h>
#include <linux/pgtable.h>
#include <asm/asm_pointer_auth.h>
@@ -21,6 +20,7 @@
#include <asm/asm-offsets.h>
#include <asm/cache.h>
#include <asm/cputype.h>
+#include <asm/el2_setup.h>
#include <asm/elf.h>
#include <asm/image.h>
#include <asm/kernel-pgtable.h>
@@ -104,7 +104,7 @@ pe_header:
*/
SYM_CODE_START(primary_entry)
bl preserve_boot_args
- bl el2_setup // Drop to EL1, w0=cpu_boot_mode
+ bl init_kernel_el // w0=cpu_boot_mode
adrp x23, __PHYS_OFFSET
and x23, x23, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0
bl set_cpu_boot_mode_flag
@@ -482,174 +482,86 @@ EXPORT_SYMBOL(kimage_vaddr)
.section ".idmap.text","awx"
/*
- * If we're fortunate enough to boot at EL2, ensure that the world is
- * sane before dropping to EL1.
+ * Starting from EL2 or EL1, configure the CPU to execute at the highest
+ * reachable EL supported by the kernel in a chosen default state. If dropping
+ * from EL2 to EL1, configure EL2 before configuring EL1.
+ *
+ * Since we cannot always rely on ERET synchronizing writes to sysregs (e.g. if
+ * SCTLR_ELx.EOS is clear), we place an ISB prior to ERET.
*
* Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
* booted in EL1 or EL2 respectively.
*/
-SYM_FUNC_START(el2_setup)
- msr SPsel, #1 // We want to use SP_EL{1,2}
+SYM_FUNC_START(init_kernel_el)
mrs x0, CurrentEL
cmp x0, #CurrentEL_EL2
- b.eq 1f
- mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
+ b.eq init_el2
+
+SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
+ mov_q x0, INIT_SCTLR_EL1_MMU_OFF
msr sctlr_el1, x0
- mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1
isb
- ret
-
-1: mov_q x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
- msr sctlr_el2, x0
+ mov_q x0, INIT_PSTATE_EL1
+ msr spsr_el1, x0
+ msr elr_el1, lr
+ mov w0, #BOOT_CPU_MODE_EL1
+ eret
+SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
#ifdef CONFIG_ARM64_VHE
/*
- * Check for VHE being present. For the rest of the EL2 setup,
- * x2 being non-zero indicates that we do have VHE, and that the
- * kernel is intended to run at EL2.
+ * Check for VHE being present. x2 being non-zero indicates that we
+ * do have VHE, and that the kernel is intended to run at EL2.
*/
mrs x2, id_aa64mmfr1_el1
ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
#else
mov x2, xzr
#endif
+ cbz x2, init_el2_nvhe
- /* Hyp configuration. */
- mov_q x0, HCR_HOST_NVHE_FLAGS
- cbz x2, set_hcr
+ /*
+ * When VHE _is_ in use, EL1 will not be used in the host and
+ * requires no configuration, and all non-hyp-specific EL2 setup
+ * will be done via the _EL1 system register aliases in __cpu_setup.
+ */
mov_q x0, HCR_HOST_VHE_FLAGS
-set_hcr:
msr hcr_el2, x0
isb
- /*
- * Allow Non-secure EL1 and EL0 to access physical timer and counter.
- * This is not necessary for VHE, since the host kernel runs in EL2,
- * and EL0 accesses are configured in the later stage of boot process.
- * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout
- * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined
- * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1
- * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in
- * EL2.
- */
- cbnz x2, 1f
- mrs x0, cnthctl_el2
- orr x0, x0, #3 // Enable EL1 physical timers
- msr cnthctl_el2, x0
-1:
- msr cntvoff_el2, xzr // Clear virtual offset
-
-#ifdef CONFIG_ARM_GIC_V3
- /* GICv3 system register access */
- mrs x0, id_aa64pfr0_el1
- ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4
- cbz x0, 3f
-
- mrs_s x0, SYS_ICC_SRE_EL2
- orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1
- orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1
- msr_s SYS_ICC_SRE_EL2, x0
- isb // Make sure SRE is now set
- mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back,
- tbz x0, #0, 3f // and check that it sticks
- msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults
-
-3:
-#endif
-
- /* Populate ID registers. */
- mrs x0, midr_el1
- mrs x1, mpidr_el1
- msr vpidr_el2, x0
- msr vmpidr_el2, x1
-
-#ifdef CONFIG_COMPAT
- msr hstr_el2, xzr // Disable CP15 traps to EL2
-#endif
-
- /* EL2 debug */
- mrs x1, id_aa64dfr0_el1
- sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4
- cmp x0, #1
- b.lt 4f // Skip if no PMU present
- mrs x0, pmcr_el0 // Disable debug access traps
- ubfx x0, x0, #11, #5 // to EL2 and allow access to
-4:
- csel x3, xzr, x0, lt // all PMU counters from EL1
-
- /* Statistical profiling */
- ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
- cbz x0, 7f // Skip if SPE not present
- cbnz x2, 6f // VHE?
- mrs_s x4, SYS_PMBIDR_EL1 // If SPE available at EL2,
- and x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT)
- cbnz x4, 5f // then permit sampling of physical
- mov x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \
- 1 << SYS_PMSCR_EL2_PA_SHIFT)
- msr_s SYS_PMSCR_EL2, x4 // addresses and physical counter
-5:
- mov x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
- orr x3, x3, x1 // If we don't have VHE, then
- b 7f // use EL1&0 translation.
-6: // For VHE, use EL2 translation
- orr x3, x3, #MDCR_EL2_TPMS // and disable access from EL1
-7:
- msr mdcr_el2, x3 // Configure debug traps
+ init_el2_state vhe
- /* LORegions */
- mrs x1, id_aa64mmfr1_el1
- ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
- cbz x0, 1f
- msr_s SYS_LORC_EL1, xzr
-1:
-
- /* Stage-2 translation */
- msr vttbr_el2, xzr
-
- cbz x2, install_el2_stub
-
- mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
isb
- ret
-SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL)
+ mov_q x0, INIT_PSTATE_EL2
+ msr spsr_el2, x0
+ msr elr_el2, lr
+ mov w0, #BOOT_CPU_MODE_EL2
+ eret
+
+SYM_INNER_LABEL(init_el2_nvhe, SYM_L_LOCAL)
/*
* When VHE is not in use, early init of EL2 and EL1 needs to be
* done here.
- * When VHE _is_ in use, EL1 will not be used in the host and
- * requires no configuration, and all non-hyp-specific EL2 setup
- * will be done via the _EL1 system register aliases in __cpu_setup.
*/
- mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
+ mov_q x0, INIT_SCTLR_EL1_MMU_OFF
msr sctlr_el1, x0
- /* Coprocessor traps. */
- mov x0, #0x33ff
- msr cptr_el2, x0 // Disable copro. traps to EL2
-
- /* SVE register access */
- mrs x1, id_aa64pfr0_el1
- ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
- cbz x1, 7f
-
- bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps
- msr cptr_el2, x0 // Disable copro. traps to EL2
+ mov_q x0, HCR_HOST_NVHE_FLAGS
+ msr hcr_el2, x0
isb
- mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
- msr_s SYS_ZCR_EL2, x1 // length for EL1.
+
+ init_el2_state nvhe
/* Hypervisor stub */
-7: adr_l x0, __hyp_stub_vectors
+ adr_l x0, __hyp_stub_vectors
msr vbar_el2, x0
+ isb
- /* spsr */
- mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
- PSR_MODE_EL1h)
- msr spsr_el2, x0
msr elr_el2, lr
- mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
+ mov w0, #BOOT_CPU_MODE_EL2
eret
-SYM_FUNC_END(el2_setup)
+SYM_FUNC_END(init_kernel_el)
/*
* Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
@@ -699,7 +611,7 @@ SYM_DATA_END(__early_cpu_boot_status)
* cores are held until we're ready for them to initialise.
*/
SYM_FUNC_START(secondary_holding_pen)
- bl el2_setup // Drop to EL1, w0=cpu_boot_mode
+ bl init_kernel_el // w0=cpu_boot_mode
bl set_cpu_boot_mode_flag
mrs x0, mpidr_el1
mov_q x1, MPIDR_HWID_BITMASK
@@ -717,7 +629,7 @@ SYM_FUNC_END(secondary_holding_pen)
* be used where CPUs are brought online dynamically by the kernel.
*/
SYM_FUNC_START(secondary_entry)
- bl el2_setup // Drop to EL1
+ bl init_kernel_el // w0=cpu_boot_mode
bl set_cpu_boot_mode_flag
b secondary_startup
SYM_FUNC_END(secondary_entry)
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 4b32588918d9..39289d75118d 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -77,9 +77,6 @@ KVM_NVHE_ALIAS(panic);
/* Vectors installed by hyp-init on reset HVC. */
KVM_NVHE_ALIAS(__hyp_stub_vectors);
-/* IDMAP TCR_EL1.T0SZ as computed by the EL1 init code */
-KVM_NVHE_ALIAS(idmap_t0sz);
-
/* Kernel symbol used by icache_is_vpipt(). */
KVM_NVHE_ALIAS(__icache_flags);
@@ -102,6 +99,9 @@ KVM_NVHE_ALIAS(gic_nonsecure_priorities);
KVM_NVHE_ALIAS(__start___kvm_ex_table);
KVM_NVHE_ALIAS(__stop___kvm_ex_table);
+/* Array containing bases of nVHE per-CPU memory regions. */
+KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base);
+
#endif /* CONFIG_KVM */
#endif /* __ARM64_KERNEL_IMAGE_VARS_H */
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index a47a40ec6ad9..71005cb0f4e0 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -422,16 +422,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
if (clone_flags & CLONE_SETTLS)
p->thread.uw.tp_value = tls;
} else {
+ /*
+ * A kthread has no context to ERET to, so ensure any buggy
+ * ERET is treated as an illegal exception return.
+ *
+ * When a user task is created from a kthread, childregs will
+ * be initialized by start_thread() or start_compat_thread().
+ */
memset(childregs, 0, sizeof(struct pt_regs));
- childregs->pstate = PSR_MODE_EL1h;
- if (IS_ENABLED(CONFIG_ARM64_UAO) &&
- cpus_have_const_cap(ARM64_HAS_UAO))
- childregs->pstate |= PSR_UAO_BIT;
-
- spectre_v4_enable_task_mitigation(p);
-
- if (system_uses_irq_prio_masking())
- childregs->pmr_save = GIC_PRIO_IRQON;
+ childregs->pstate = PSR_MODE_EL1h | PSR_IL_BIT;
p->thread.cpu_context.x19 = stack_start;
p->thread.cpu_context.x20 = stk_sz;
@@ -461,17 +460,6 @@ static void tls_thread_switch(struct task_struct *next)
write_sysreg(*task_user_tls(next), tpidr_el0);
}
-/* Restore the UAO state depending on next's addr_limit */
-void uao_thread_switch(struct task_struct *next)
-{
- if (IS_ENABLED(CONFIG_ARM64_UAO)) {
- if (task_thread_info(next)->addr_limit == KERNEL_DS)
- asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO));
- else
- asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO));
- }
-}
-
/*
* Force SSBS state on context-switch, since it may be lost after migrating
* from a CPU which treats the bit as RES0 in a heterogeneous system.
@@ -554,7 +542,6 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
hw_breakpoint_thread_switch(next);
contextidr_thread_switch(next);
entry_task_switch(next);
- uao_thread_switch(next);
ssbs_thread_switch(next);
erratum_1418040_thread_switch(prev, next);
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index fca03648e270..902e4084c477 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -24,6 +24,7 @@
#include <linux/prctl.h>
#include <linux/sched/task_stack.h>
+#include <asm/insn.h>
#include <asm/spectre.h>
#include <asm/traps.h>
#include <asm/virt.h>
@@ -520,12 +521,12 @@ static enum mitigation_state spectre_v4_enable_hw_mitigation(void)
if (spectre_v4_mitigations_off()) {
sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS);
- asm volatile(SET_PSTATE_SSBS(1));
+ set_pstate_ssbs(1);
return SPECTRE_VULNERABLE;
}
/* SCTLR_EL1.DSSBS was initialised to 0 during boot */
- asm volatile(SET_PSTATE_SSBS(0));
+ set_pstate_ssbs(0);
return SPECTRE_MITIGATED;
}
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index 7689f2031c0c..e04b3e90c003 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -178,12 +178,6 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
sdei_api_event_context(i, &regs->regs[i]);
}
- /*
- * We didn't take an exception to get here, set PAN. UAO will be cleared
- * by sdei_event_handler()s force_uaccess_begin() call.
- */
- __uaccess_enable_hw_pan();
-
err = sdei_event_handler(regs, arg);
if (err)
return SDEI_EV_FAILED;
@@ -222,12 +216,39 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
return vbar + 0x480;
}
+static void __kprobes notrace __sdei_pstate_entry(void)
+{
+ /*
+ * The original SDEI spec (ARM DEN 0054A) can be read ambiguously as to
+ * whether PSTATE bits are inherited unchanged or generated from
+ * scratch, and the TF-A implementation always clears PAN and always
+ * clears UAO. There are no other known implementations.
+ *
+ * Subsequent revisions (ARM DEN 0054B) follow the usual rules for how
+ * PSTATE is modified upon architectural exceptions, and so PAN is
+ * either inherited or set per SCTLR_ELx.SPAN, and UAO is always
+ * cleared.
+ *
+ * We must explicitly reset PAN to the expected state, including
+ * clearing it when the host isn't using it, in case a VM had it set.
+ */
+ if (system_uses_hw_pan())
+ set_pstate_pan(1);
+ else if (cpu_has_pan())
+ set_pstate_pan(0);
+}
asmlinkage __kprobes notrace unsigned long
__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
{
unsigned long ret;
+ /*
+ * We didn't take an exception to get here, so the HW hasn't
+ * set/cleared bits in PSTATE that we may rely on. Initialize PAN.
+ */
+ __sdei_pstate_entry();
+
nmi_enter();
ret = _sdei_handler(regs, arg);
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 133257ffd859..2f2973bc67c7 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -276,7 +276,7 @@ arch_initcall(reserve_memblock_reserved_regions);
u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
-u64 cpu_logical_map(int cpu)
+u64 cpu_logical_map(unsigned int cpu)
{
return __cpu_logical_map[cpu];
}
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index a8184cad8890..af5c6c6638f7 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -922,9 +922,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
trace_hardirqs_off();
do {
- /* Check valid user FS if needed */
- addr_limit_user_check();
-
if (thread_flags & _TIF_NEED_RESCHED) {
/* Unmask Debug and SError for the next task */
local_daif_restore(DAIF_PROCCTX_NOIRQ);
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index ba40d57757d6..4be7f7eed875 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -99,7 +99,7 @@ SYM_FUNC_END(__cpu_suspend_enter)
.pushsection ".idmap.text", "awx"
SYM_CODE_START(cpu_resume)
- bl el2_setup // if in EL2 drop to EL1 cleanly
+ bl init_kernel_el
bl __cpu_setup
/* enable the MMU early - so we can access sleep_save_stash by va */
adrp x1, swapper_pg_dir
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 96cd347c7a46..a67b37a7a47e 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -58,7 +58,6 @@ void notrace __cpu_suspend_exit(void)
* features that might not have been set correctly.
*/
__uaccess_enable_hw_pan();
- uao_thread_switch(current);
/*
* Restore HW breakpoint registers to sane values
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index d65f52264aba..a8f8e409e2bf 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -28,7 +28,7 @@ ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \
$(btildflags-y) -T
ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
-ccflags-y += -DDISABLE_BRANCH_PROFILING
+ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS)
KASAN_SANITIZE := n
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index 79280c53b9a6..a1e0f91e6cea 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -48,7 +48,7 @@ cc32-as-instr = $(call try-run,\
# As a result we set our own flags here.
# KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile
-VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
+VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
VDSO_CPPFLAGS += $(LINUXINCLUDE)
# Common C and assembly flags
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 1bda604f4c70..43af13968dfd 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -30,6 +30,13 @@ jiffies = jiffies_64;
*(__kvm_ex_table) \
__stop___kvm_ex_table = .;
+#define HYPERVISOR_DATA_SECTIONS \
+ HYP_SECTION_NAME(.data..ro_after_init) : { \
+ __hyp_data_ro_after_init_start = .; \
+ *(HYP_SECTION_NAME(.data..ro_after_init)) \
+ __hyp_data_ro_after_init_end = .; \
+ }
+
#define HYPERVISOR_PERCPU_SECTION \
. = ALIGN(PAGE_SIZE); \
HYP_SECTION_NAME(.data..percpu) : { \
@@ -37,6 +44,7 @@ jiffies = jiffies_64;
}
#else /* CONFIG_KVM */
#define HYPERVISOR_EXTABLE
+#define HYPERVISOR_DATA_SECTIONS
#define HYPERVISOR_PERCPU_SECTION
#endif
@@ -201,7 +209,7 @@ SECTIONS
INIT_CALLS
CON_INITCALL
INIT_RAM_FS
- *(.init.rodata.* .init.bss) /* from the EFI stub */
+ *(.init.altinstructions .init.rodata.* .init.bss) /* from the EFI stub */
}
.exit.data : {
EXIT_DATA
@@ -234,6 +242,8 @@ SECTIONS
_sdata = .;
RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
+ HYPERVISOR_DATA_SECTIONS
+
/*
* Data written with the MMU off but read with the MMU on requires
* cache lines to be invalidated, discarding up to a Cache Writeback
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 7e86207fa2fc..6e637d2b4cfb 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -19,6 +19,7 @@
#include <linux/kvm_irqfd.h>
#include <linux/irqbypass.h>
#include <linux/sched/stat.h>
+#include <linux/psci.h>
#include <trace/events/kvm.h>
#define CREATE_TRACE_POINTS
@@ -45,10 +46,14 @@
__asm__(".arch_extension virt");
#endif
+static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
+DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
+
DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
+DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
/* The VMID used in the VTTBR */
static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
@@ -60,6 +65,10 @@ static bool vgic_present;
static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
+extern u64 kvm_nvhe_sym(__cpu_logical_map)[NR_CPUS];
+extern u32 kvm_nvhe_sym(kvm_host_psci_version);
+extern struct psci_0_1_function_ids kvm_nvhe_sym(kvm_host_psci_0_1_function_ids);
+
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
@@ -1382,11 +1391,9 @@ static int kvm_init_vector_slots(void)
static void cpu_init_hyp_mode(void)
{
- phys_addr_t pgd_ptr;
- unsigned long hyp_stack_ptr;
- unsigned long vector_ptr;
- unsigned long tpidr_el2;
+ struct kvm_nvhe_init_params *params = this_cpu_ptr_nvhe_sym(kvm_init_params);
struct arm_smccc_res res;
+ unsigned long tcr;
/* Switch from the HYP stub to our own HYP init vector */
__hyp_set_vectors(kvm_get_idmap_vector());
@@ -1396,13 +1403,38 @@ static void cpu_init_hyp_mode(void)
* kernel's mapping to the linear mapping, and store it in tpidr_el2
* so that we can use adr_l to access per-cpu variables in EL2.
*/
- tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) -
- (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start));
+ params->tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) -
+ (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start));
+
+ params->mair_el2 = read_sysreg(mair_el1);
+
+ /*
+ * The ID map may be configured to use an extended virtual address
+ * range. This is only the case if system RAM is out of range for the
+ * currently configured page size and VA_BITS, in which case we will
+ * also need the extended virtual range for the HYP ID map, or we won't
+ * be able to enable the EL2 MMU.
+ *
+ * However, at EL2, there is only one TTBR register, and we can't switch
+ * between translation tables *and* update TCR_EL2.T0SZ at the same
+ * time. Bottom line: we need to use the extended range with *both* our
+ * translation tables.
+ *
+ * So use the same T0SZ value we use for the ID map.
+ */
+ tcr = (read_sysreg(tcr_el1) & TCR_EL2_MASK) | TCR_EL2_RES1;
+ tcr &= ~TCR_T0SZ_MASK;
+ tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
+ params->tcr_el2 = tcr;
+
+ params->stack_hyp_va = kern_hyp_va(__this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE);
+ params->pgd_pa = kvm_mmu_get_httbr();
- pgd_ptr = kvm_mmu_get_httbr();
- hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE;
- hyp_stack_ptr = kern_hyp_va(hyp_stack_ptr);
- vector_ptr = (unsigned long)kern_hyp_va(kvm_ksym_ref(__kvm_hyp_host_vector));
+ /*
+ * Flush the init params from the data cache because the struct will
+ * be read while the MMU is off.
+ */
+ kvm_flush_dcache_to_poc(params, sizeof(*params));
/*
* Call initialization code, and switch to the full blown HYP code.
@@ -1411,8 +1443,7 @@ static void cpu_init_hyp_mode(void)
* cpus_have_const_cap() wrapper.
*/
BUG_ON(!system_capabilities_finalized());
- arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init),
- pgd_ptr, tpidr_el2, hyp_stack_ptr, vector_ptr, &res);
+ arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), virt_to_phys(params), &res);
WARN_ON(res.a0 != SMCCC_RET_SUCCESS);
/*
@@ -1501,7 +1532,8 @@ static void _kvm_arch_hardware_disable(void *discard)
void kvm_arch_hardware_disable(void)
{
- _kvm_arch_hardware_disable(NULL);
+ if (!is_protected_kvm_enabled())
+ _kvm_arch_hardware_disable(NULL);
}
#ifdef CONFIG_CPU_PM
@@ -1544,11 +1576,13 @@ static struct notifier_block hyp_init_cpu_pm_nb = {
static void __init hyp_cpu_pm_init(void)
{
- cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
+ if (!is_protected_kvm_enabled())
+ cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
}
static void __init hyp_cpu_pm_exit(void)
{
- cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb);
+ if (!is_protected_kvm_enabled())
+ cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb);
}
#else
static inline void hyp_cpu_pm_init(void)
@@ -1559,6 +1593,36 @@ static inline void hyp_cpu_pm_exit(void)
}
#endif
+static void init_cpu_logical_map(void)
+{
+ unsigned int cpu;
+
+ /*
+ * Copy the MPIDR <-> logical CPU ID mapping to hyp.
+ * Only copy the set of online CPUs whose features have been chacked
+ * against the finalized system capabilities. The hypervisor will not
+ * allow any other CPUs from the `possible` set to boot.
+ */
+ for_each_online_cpu(cpu)
+ kvm_nvhe_sym(__cpu_logical_map)[cpu] = cpu_logical_map(cpu);
+}
+
+static bool init_psci_relay(void)
+{
+ /*
+ * If PSCI has not been initialized, protected KVM cannot install
+ * itself on newly booted CPUs.
+ */
+ if (!psci_ops.get_version) {
+ kvm_err("Cannot initialize protected mode without PSCI\n");
+ return false;
+ }
+
+ kvm_nvhe_sym(kvm_host_psci_version) = psci_ops.get_version();
+ kvm_nvhe_sym(kvm_host_psci_0_1_function_ids) = get_psci_0_1_function_ids();
+ return true;
+}
+
static int init_common_resources(void)
{
return kvm_set_ipa_limit();
@@ -1606,7 +1670,8 @@ static int init_subsystems(void)
kvm_sys_reg_table_init();
out:
- on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
+ if (err || !is_protected_kvm_enabled())
+ on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
return err;
}
@@ -1680,6 +1745,14 @@ static int init_hyp_mode(void)
goto out_err;
}
+ err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_ro_after_init_start),
+ kvm_ksym_ref(__hyp_data_ro_after_init_end),
+ PAGE_HYP_RO);
+ if (err) {
+ kvm_err("Cannot map .hyp.data..ro_after_init section\n");
+ goto out_err;
+ }
+
err = create_hyp_mappings(kvm_ksym_ref(__start_rodata),
kvm_ksym_ref(__end_rodata), PAGE_HYP_RO);
if (err) {
@@ -1723,6 +1796,13 @@ static int init_hyp_mode(void)
}
}
+ if (is_protected_kvm_enabled()) {
+ init_cpu_logical_map();
+
+ if (!init_psci_relay())
+ goto out_err;
+ }
+
return 0;
out_err:
@@ -1847,10 +1927,14 @@ int kvm_arch_init(void *opaque)
if (err)
goto out_hyp;
- if (in_hyp_mode)
+ if (is_protected_kvm_enabled()) {
+ static_branch_enable(&kvm_protected_mode_initialized);
+ kvm_info("Protected nVHE mode initialized successfully\n");
+ } else if (in_hyp_mode) {
kvm_info("VHE mode initialized successfully\n");
- else
+ } else {
kvm_info("Hyp mode initialized successfully\n");
+ }
return 0;
@@ -1868,6 +1952,25 @@ void kvm_arch_exit(void)
kvm_perf_teardown();
}
+static int __init early_kvm_mode_cfg(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
+
+ if (strcmp(arg, "protected") == 0) {
+ kvm_mode = KVM_MODE_PROTECTED;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+early_param("kvm-arm.mode", early_kvm_mode_cfg);
+
+enum kvm_mode kvm_get_mode(void)
+{
+ return kvm_mode;
+}
+
static int arm_init(void)
{
int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
new file mode 100644
index 000000000000..1e6d995968a1
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Trap handler helpers.
+ *
+ * Copyright (C) 2020 - Google LLC
+ * Author: Marc Zyngier <maz@kernel.org>
+ */
+
+#ifndef __ARM64_KVM_NVHE_TRAP_HANDLER_H__
+#define __ARM64_KVM_NVHE_TRAP_HANDLER_H__
+
+#include <asm/kvm_host.h>
+
+#define cpu_reg(ctxt, r) (ctxt)->regs.regs[r]
+#define DECLARE_REG(type, name, ctxt, reg) \
+ type name = (type)cpu_reg(ctxt, (reg))
+
+#endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 77b8c4e06f2f..1f1e351c5fe2 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -6,7 +6,8 @@
asflags-y := -D__KVM_NVHE_HYPERVISOR__
ccflags-y := -D__KVM_NVHE_HYPERVISOR__
-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o hyp-main.o
+obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
+ hyp-main.o hyp-smp.o psci-relay.o
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o ../exception.o
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index fe2740b224cf..a820dfdc9c25 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -39,6 +39,7 @@ SYM_FUNC_START(__host_exit)
bl handle_trap
/* Restore host regs x0-x17 */
+__host_enter_restore_full:
ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)]
ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)]
ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)]
@@ -62,6 +63,14 @@ __host_enter_without_restoring:
SYM_FUNC_END(__host_exit)
/*
+ * void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
+ */
+SYM_FUNC_START(__host_enter)
+ mov x29, x0
+ b __host_enter_restore_full
+SYM_FUNC_END(__host_enter)
+
+/*
* void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
*/
SYM_FUNC_START(__hyp_do_panic)
@@ -180,3 +189,41 @@ SYM_CODE_START(__kvm_hyp_host_vector)
invalid_host_el1_vect // FIQ 32-bit EL1
invalid_host_el1_vect // Error 32-bit EL1
SYM_CODE_END(__kvm_hyp_host_vector)
+
+/*
+ * Forward SMC with arguments in struct kvm_cpu_context, and
+ * store the result into the same struct. Assumes SMCCC 1.2 or older.
+ *
+ * x0: struct kvm_cpu_context*
+ */
+SYM_CODE_START(__kvm_hyp_host_forward_smc)
+ /*
+ * Use x18 to keep the pointer to the host context because
+ * x18 is callee-saved in SMCCC but not in AAPCS64.
+ */
+ mov x18, x0
+
+ ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)]
+ ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)]
+ ldp x4, x5, [x18, #CPU_XREG_OFFSET(4)]
+ ldp x6, x7, [x18, #CPU_XREG_OFFSET(6)]
+ ldp x8, x9, [x18, #CPU_XREG_OFFSET(8)]
+ ldp x10, x11, [x18, #CPU_XREG_OFFSET(10)]
+ ldp x12, x13, [x18, #CPU_XREG_OFFSET(12)]
+ ldp x14, x15, [x18, #CPU_XREG_OFFSET(14)]
+ ldp x16, x17, [x18, #CPU_XREG_OFFSET(16)]
+
+ smc #0
+
+ stp x0, x1, [x18, #CPU_XREG_OFFSET(0)]
+ stp x2, x3, [x18, #CPU_XREG_OFFSET(2)]
+ stp x4, x5, [x18, #CPU_XREG_OFFSET(4)]
+ stp x6, x7, [x18, #CPU_XREG_OFFSET(6)]
+ stp x8, x9, [x18, #CPU_XREG_OFFSET(8)]
+ stp x10, x11, [x18, #CPU_XREG_OFFSET(10)]
+ stp x12, x13, [x18, #CPU_XREG_OFFSET(12)]
+ stp x14, x15, [x18, #CPU_XREG_OFFSET(14)]
+ stp x16, x17, [x18, #CPU_XREG_OFFSET(16)]
+
+ ret
+SYM_CODE_END(__kvm_hyp_host_forward_smc)
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
index b11a9d7db677..31b060a44045 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -9,6 +9,7 @@
#include <asm/alternative.h>
#include <asm/assembler.h>
+#include <asm/el2_setup.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
@@ -47,10 +48,7 @@ __invalid:
/*
* x0: SMCCC function ID
- * x1: HYP pgd
- * x2: per-CPU offset
- * x3: HYP stack
- * x4: HYP vectors
+ * x1: struct kvm_nvhe_init_params PA
*/
__do_hyp_init:
/* Check for a stub HVC call */
@@ -71,48 +69,53 @@ __do_hyp_init:
mov x0, #SMCCC_RET_NOT_SUPPORTED
eret
-1:
- /* Set tpidr_el2 for use by HYP to free a register */
- msr tpidr_el2, x2
+1: mov x0, x1
+ mov x4, lr
+ bl ___kvm_hyp_init
+ mov lr, x4
- phys_to_ttbr x0, x1
-alternative_if ARM64_HAS_CNP
- orr x0, x0, #TTBR_CNP_BIT
+ /* Hello, World! */
+ mov x0, #SMCCC_RET_SUCCESS
+ eret
+SYM_CODE_END(__kvm_hyp_init)
+
+/*
+ * Initialize the hypervisor in EL2.
+ *
+ * Only uses x0..x3 so as to not clobber callee-saved SMCCC registers
+ * and leave x4 for the caller.
+ *
+ * x0: struct kvm_nvhe_init_params PA
+ */
+SYM_CODE_START_LOCAL(___kvm_hyp_init)
+alternative_if ARM64_KVM_PROTECTED_MODE
+ mov_q x1, HCR_HOST_NVHE_PROTECTED_FLAGS
+ msr hcr_el2, x1
alternative_else_nop_endif
- msr ttbr0_el2, x0
- mrs x0, tcr_el1
- mov_q x1, TCR_EL2_MASK
- and x0, x0, x1
- mov x1, #TCR_EL2_RES1
- orr x0, x0, x1
+ ldr x1, [x0, #NVHE_INIT_TPIDR_EL2]
+ msr tpidr_el2, x1
- /*
- * The ID map may be configured to use an extended virtual address
- * range. This is only the case if system RAM is out of range for the
- * currently configured page size and VA_BITS, in which case we will
- * also need the extended virtual range for the HYP ID map, or we won't
- * be able to enable the EL2 MMU.
- *
- * However, at EL2, there is only one TTBR register, and we can't switch
- * between translation tables *and* update TCR_EL2.T0SZ at the same
- * time. Bottom line: we need to use the extended range with *both* our
- * translation tables.
- *
- * So use the same T0SZ value we use for the ID map.
- */
- ldr_l x1, idmap_t0sz
- bfi x0, x1, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
+ ldr x1, [x0, #NVHE_INIT_STACK_HYP_VA]
+ mov sp, x1
+
+ ldr x1, [x0, #NVHE_INIT_MAIR_EL2]
+ msr mair_el2, x1
+
+ ldr x1, [x0, #NVHE_INIT_PGD_PA]
+ phys_to_ttbr x2, x1
+alternative_if ARM64_HAS_CNP
+ orr x2, x2, #TTBR_CNP_BIT
+alternative_else_nop_endif
+ msr ttbr0_el2, x2
/*
* Set the PS bits in TCR_EL2.
*/
- tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2
+ ldr x1, [x0, #NVHE_INIT_TCR_EL2]
+ tcr_compute_pa_size x1, #TCR_EL2_PS_SHIFT, x2, x3
+ msr tcr_el2, x1
- msr tcr_el2, x0
-
- mrs x0, mair_el1
- msr mair_el2, x0
isb
/* Invalidate the stale TLBs from Bootloader */
@@ -134,14 +137,70 @@ alternative_else_nop_endif
msr sctlr_el2, x0
isb
- /* Set the stack and new vectors */
- mov sp, x3
- msr vbar_el2, x4
+ /* Set the host vector */
+ ldr x0, =__kvm_hyp_host_vector
+ kimg_hyp_va x0, x1
+ msr vbar_el2, x0
- /* Hello, World! */
- mov x0, #SMCCC_RET_SUCCESS
- eret
-SYM_CODE_END(__kvm_hyp_init)
+ ret
+SYM_CODE_END(___kvm_hyp_init)
+
+/*
+ * PSCI CPU_ON entry point
+ *
+ * x0: struct kvm_nvhe_init_params PA
+ */
+SYM_CODE_START(kvm_hyp_cpu_entry)
+ mov x1, #1 // is_cpu_on = true
+ b __kvm_hyp_init_cpu
+SYM_CODE_END(kvm_hyp_cpu_entry)
+
+/*
+ * PSCI CPU_SUSPEND / SYSTEM_SUSPEND entry point
+ *
+ * x0: struct kvm_nvhe_init_params PA
+ */
+SYM_CODE_START(kvm_hyp_cpu_resume)
+ mov x1, #0 // is_cpu_on = false
+ b __kvm_hyp_init_cpu
+SYM_CODE_END(kvm_hyp_cpu_resume)
+
+/*
+ * Common code for CPU entry points. Initializes EL2 state and
+ * installs the hypervisor before handing over to a C handler.
+ *
+ * x0: struct kvm_nvhe_init_params PA
+ * x1: bool is_cpu_on
+ */
+SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
+ mov x28, x0 // Stash arguments
+ mov x29, x1
+
+ /* Check that the core was booted in EL2. */
+ mrs x0, CurrentEL
+ cmp x0, #CurrentEL_EL2
+ b.eq 2f
+
+ /* The core booted in EL1. KVM cannot be initialized on it. */
+1: wfe
+ wfi
+ b 1b
+
+2: msr SPsel, #1 // We want to use SP_EL{1,2}
+
+ /* Initialize EL2 CPU state to sane values. */
+ init_el2_state nvhe // Clobbers x0..x2
+
+ /* Enable MMU, set vectors and stack. */
+ mov x0, x28
+ bl ___kvm_hyp_init // Clobbers x0..x3
+
+ /* Leave idmap. */
+ mov x0, x29
+ ldr x1, =kvm_host_psci_cpu_entry
+ kimg_hyp_va x1, x2
+ br x1
+SYM_CODE_END(__kvm_hyp_init_cpu)
SYM_CODE_START(__kvm_handle_stub_hvc)
cmp x0, #HVC_SOFT_RESTART
@@ -176,6 +235,11 @@ reset:
msr sctlr_el2, x5
isb
+alternative_if ARM64_KVM_PROTECTED_MODE
+ mov_q x5, HCR_HOST_NVHE_FLAGS
+ msr hcr_el2, x5
+alternative_else_nop_endif
+
/* Install stub vectors */
adr_l x5, __hyp_stub_vectors
msr vbar_el2, x5
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 82df7fc24760..bde658d51404 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -12,9 +12,11 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
-#define cpu_reg(ctxt, r) (ctxt)->regs.regs[r]
-#define DECLARE_REG(type, name, ctxt, reg) \
- type name = (type)cpu_reg(ctxt, (reg))
+#include <nvhe/trap_handler.h>
+
+DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
+
+void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
{
@@ -150,12 +152,43 @@ inval:
cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED;
}
+static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt)
+{
+ __kvm_hyp_host_forward_smc(host_ctxt);
+}
+
+static void skip_host_instruction(void)
+{
+ write_sysreg_el2(read_sysreg_el2(SYS_ELR) + 4, SYS_ELR);
+}
+
+static void handle_host_smc(struct kvm_cpu_context *host_ctxt)
+{
+ bool handled;
+
+ handled = kvm_host_psci_handler(host_ctxt);
+ if (!handled)
+ default_host_smc_handler(host_ctxt);
+
+ /*
+ * Unlike HVC, the return address of an SMC is the instruction's PC.
+ * Move the return address past the instruction.
+ */
+ skip_host_instruction();
+}
+
void handle_trap(struct kvm_cpu_context *host_ctxt)
{
u64 esr = read_sysreg_el2(SYS_ESR);
- if (unlikely(ESR_ELx_EC(esr) != ESR_ELx_EC_HVC64))
+ switch (ESR_ELx_EC(esr)) {
+ case ESR_ELx_EC_HVC64:
+ handle_host_hcall(host_ctxt);
+ break;
+ case ESR_ELx_EC_SMC64:
+ handle_host_smc(host_ctxt);
+ break;
+ default:
hyp_panic();
-
- handle_host_hcall(host_ctxt);
+ }
}
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
new file mode 100644
index 000000000000..cbab0c6246e2
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 - Google LLC
+ * Author: David Brazdil <dbrazdil@google.com>
+ */
+
+#include <asm/kvm_asm.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+
+/*
+ * nVHE copy of data structures tracking available CPU cores.
+ * Only entries for CPUs that were online at KVM init are populated.
+ * Other CPUs should not be allowed to boot because their features were
+ * not checked against the finalized system capabilities.
+ */
+u64 __ro_after_init __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
+
+u64 cpu_logical_map(unsigned int cpu)
+{
+ if (cpu >= ARRAY_SIZE(__cpu_logical_map))
+ hyp_panic();
+
+ return __cpu_logical_map[cpu];
+}
+
+unsigned long __hyp_per_cpu_offset(unsigned int cpu)
+{
+ unsigned long *cpu_base_array;
+ unsigned long this_cpu_base;
+ unsigned long elf_base;
+
+ if (cpu >= ARRAY_SIZE(kvm_arm_hyp_percpu_base))
+ hyp_panic();
+
+ cpu_base_array = (unsigned long *)hyp_symbol_addr(kvm_arm_hyp_percpu_base);
+ this_cpu_base = kern_hyp_va(cpu_base_array[cpu]);
+ elf_base = (unsigned long)hyp_symbol_addr(__per_cpu_start);
+ return this_cpu_base - elf_base;
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
index bb2d986ff696..5d76ff2ba63e 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
@@ -16,4 +16,5 @@ SECTIONS {
HYP_SECTION_NAME(.data..percpu) : {
PERCPU_INPUT(L1_CACHE_BYTES)
}
+ HYP_SECTION(.data..ro_after_init)
}
diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
new file mode 100644
index 000000000000..08dc9de69314
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
@@ -0,0 +1,324 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 - Google LLC
+ * Author: David Brazdil <dbrazdil@google.com>
+ */
+
+#include <asm/kvm_asm.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <kvm/arm_hypercalls.h>
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+#include <linux/psci.h>
+#include <kvm/arm_psci.h>
+#include <uapi/linux/psci.h>
+
+#include <nvhe/trap_handler.h>
+
+void kvm_hyp_cpu_entry(unsigned long r0);
+void kvm_hyp_cpu_resume(unsigned long r0);
+
+void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
+
+/* Config options set by the host. */
+__ro_after_init u32 kvm_host_psci_version;
+__ro_after_init struct psci_0_1_function_ids kvm_host_psci_0_1_function_ids;
+__ro_after_init s64 hyp_physvirt_offset;
+
+#define __hyp_pa(x) ((phys_addr_t)((x)) + hyp_physvirt_offset)
+
+#define INVALID_CPU_ID UINT_MAX
+
+struct psci_boot_args {
+ atomic_t lock;
+ unsigned long pc;
+ unsigned long r0;
+};
+
+#define PSCI_BOOT_ARGS_UNLOCKED 0
+#define PSCI_BOOT_ARGS_LOCKED 1
+
+#define PSCI_BOOT_ARGS_INIT \
+ ((struct psci_boot_args){ \
+ .lock = ATOMIC_INIT(PSCI_BOOT_ARGS_UNLOCKED), \
+ })
+
+static DEFINE_PER_CPU(struct psci_boot_args, cpu_on_args) = PSCI_BOOT_ARGS_INIT;
+static DEFINE_PER_CPU(struct psci_boot_args, suspend_args) = PSCI_BOOT_ARGS_INIT;
+
+static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(u64, func_id, host_ctxt, 0);
+
+ return func_id;
+}
+
+static bool is_psci_0_1_call(u64 func_id)
+{
+ return (func_id == kvm_host_psci_0_1_function_ids.cpu_suspend) ||
+ (func_id == kvm_host_psci_0_1_function_ids.cpu_on) ||
+ (func_id == kvm_host_psci_0_1_function_ids.cpu_off) ||
+ (func_id == kvm_host_psci_0_1_function_ids.migrate);
+}
+
+static bool is_psci_0_2_call(u64 func_id)
+{
+ /* SMCCC reserves IDs 0x00-1F with the given 32/64-bit base for PSCI. */
+ return (PSCI_0_2_FN(0) <= func_id && func_id <= PSCI_0_2_FN(31)) ||
+ (PSCI_0_2_FN64(0) <= func_id && func_id <= PSCI_0_2_FN64(31));
+}
+
+static bool is_psci_call(u64 func_id)
+{
+ switch (kvm_host_psci_version) {
+ case PSCI_VERSION(0, 1):
+ return is_psci_0_1_call(func_id);
+ default:
+ return is_psci_0_2_call(func_id);
+ }
+}
+
+static unsigned long psci_call(unsigned long fn, unsigned long arg0,
+ unsigned long arg1, unsigned long arg2)
+{
+ struct arm_smccc_res res;
+
+ arm_smccc_1_1_smc(fn, arg0, arg1, arg2, &res);
+ return res.a0;
+}
+
+static unsigned long psci_forward(struct kvm_cpu_context *host_ctxt)
+{
+ return psci_call(cpu_reg(host_ctxt, 0), cpu_reg(host_ctxt, 1),
+ cpu_reg(host_ctxt, 2), cpu_reg(host_ctxt, 3));
+}
+
+static __noreturn unsigned long psci_forward_noreturn(struct kvm_cpu_context *host_ctxt)
+{
+ psci_forward(host_ctxt);
+ hyp_panic(); /* unreachable */
+}
+
+static unsigned int find_cpu_id(u64 mpidr)
+{
+ unsigned int i;
+
+ /* Reject invalid MPIDRs */
+ if (mpidr & ~MPIDR_HWID_BITMASK)
+ return INVALID_CPU_ID;
+
+ for (i = 0; i < NR_CPUS; i++) {
+ if (cpu_logical_map(i) == mpidr)
+ return i;
+ }
+
+ return INVALID_CPU_ID;
+}
+
+static __always_inline bool try_acquire_boot_args(struct psci_boot_args *args)
+{
+ return atomic_cmpxchg_acquire(&args->lock,
+ PSCI_BOOT_ARGS_UNLOCKED,
+ PSCI_BOOT_ARGS_LOCKED) ==
+ PSCI_BOOT_ARGS_UNLOCKED;
+}
+
+static __always_inline void release_boot_args(struct psci_boot_args *args)
+{
+ atomic_set_release(&args->lock, PSCI_BOOT_ARGS_UNLOCKED);
+}
+
+static int psci_cpu_on(u64 func_id, struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(u64, mpidr, host_ctxt, 1);
+ DECLARE_REG(unsigned long, pc, host_ctxt, 2);
+ DECLARE_REG(unsigned long, r0, host_ctxt, 3);
+
+ unsigned int cpu_id;
+ struct psci_boot_args *boot_args;
+ struct kvm_nvhe_init_params *init_params;
+ int ret;
+
+ /*
+ * Find the logical CPU ID for the given MPIDR. The search set is
+ * the set of CPUs that were online at the point of KVM initialization.
+ * Booting other CPUs is rejected because their cpufeatures were not
+ * checked against the finalized capabilities. This could be relaxed
+ * by doing the feature checks in hyp.
+ */
+ cpu_id = find_cpu_id(mpidr);
+ if (cpu_id == INVALID_CPU_ID)
+ return PSCI_RET_INVALID_PARAMS;
+
+ boot_args = per_cpu_ptr(hyp_symbol_addr(cpu_on_args), cpu_id);
+ init_params = per_cpu_ptr(hyp_symbol_addr(kvm_init_params), cpu_id);
+
+ /* Check if the target CPU is already being booted. */
+ if (!try_acquire_boot_args(boot_args))
+ return PSCI_RET_ALREADY_ON;
+
+ boot_args->pc = pc;
+ boot_args->r0 = r0;
+ wmb();
+
+ ret = psci_call(func_id, mpidr,
+ __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_entry)),
+ __hyp_pa(init_params));
+
+ /* If successful, the lock will be released by the target CPU. */
+ if (ret != PSCI_RET_SUCCESS)
+ release_boot_args(boot_args);
+
+ return ret;
+}
+
+static int psci_cpu_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(u64, power_state, host_ctxt, 1);
+ DECLARE_REG(unsigned long, pc, host_ctxt, 2);
+ DECLARE_REG(unsigned long, r0, host_ctxt, 3);
+
+ struct psci_boot_args *boot_args;
+ struct kvm_nvhe_init_params *init_params;
+
+ boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args));
+ init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params));
+
+ /*
+ * No need to acquire a lock before writing to boot_args because a core
+ * can only suspend itself. Racy CPU_ON calls use a separate struct.
+ */
+ boot_args->pc = pc;
+ boot_args->r0 = r0;
+
+ /*
+ * Will either return if shallow sleep state, or wake up into the entry
+ * point if it is a deep sleep state.
+ */
+ return psci_call(func_id, power_state,
+ __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_resume)),
+ __hyp_pa(init_params));
+}
+
+static int psci_system_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(unsigned long, pc, host_ctxt, 1);
+ DECLARE_REG(unsigned long, r0, host_ctxt, 2);
+
+ struct psci_boot_args *boot_args;
+ struct kvm_nvhe_init_params *init_params;
+
+ boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args));
+ init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params));
+
+ /*
+ * No need to acquire a lock before writing to boot_args because a core
+ * can only suspend itself. Racy CPU_ON calls use a separate struct.
+ */
+ boot_args->pc = pc;
+ boot_args->r0 = r0;
+
+ /* Will only return on error. */
+ return psci_call(func_id,
+ __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_resume)),
+ __hyp_pa(init_params), 0);
+}
+
+asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on)
+{
+ struct psci_boot_args *boot_args;
+ struct kvm_cpu_context *host_ctxt;
+
+ host_ctxt = &this_cpu_ptr(hyp_symbol_addr(kvm_host_data))->host_ctxt;
+
+ if (is_cpu_on)
+ boot_args = this_cpu_ptr(hyp_symbol_addr(cpu_on_args));
+ else
+ boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args));
+
+ cpu_reg(host_ctxt, 0) = boot_args->r0;
+ write_sysreg_el2(boot_args->pc, SYS_ELR);
+
+ if (is_cpu_on)
+ release_boot_args(boot_args);
+
+ __host_enter(host_ctxt);
+}
+
+static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt)
+{
+ if ((func_id == kvm_host_psci_0_1_function_ids.cpu_off) ||
+ (func_id == kvm_host_psci_0_1_function_ids.migrate))
+ return psci_forward(host_ctxt);
+ else if (func_id == kvm_host_psci_0_1_function_ids.cpu_on)
+ return psci_cpu_on(func_id, host_ctxt);
+ else if (func_id == kvm_host_psci_0_1_function_ids.cpu_suspend)
+ return psci_cpu_suspend(func_id, host_ctxt);
+ else
+ return PSCI_RET_NOT_SUPPORTED;
+}
+
+static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ctxt)
+{
+ switch (func_id) {
+ case PSCI_0_2_FN_PSCI_VERSION:
+ case PSCI_0_2_FN_CPU_OFF:
+ case PSCI_0_2_FN64_AFFINITY_INFO:
+ case PSCI_0_2_FN64_MIGRATE:
+ case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
+ case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU:
+ return psci_forward(host_ctxt);
+ case PSCI_0_2_FN_SYSTEM_OFF:
+ case PSCI_0_2_FN_SYSTEM_RESET:
+ psci_forward_noreturn(host_ctxt);
+ unreachable();
+ case PSCI_0_2_FN64_CPU_SUSPEND:
+ return psci_cpu_suspend(func_id, host_ctxt);
+ case PSCI_0_2_FN64_CPU_ON:
+ return psci_cpu_on(func_id, host_ctxt);
+ default:
+ return PSCI_RET_NOT_SUPPORTED;
+ }
+}
+
+static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_ctxt)
+{
+ switch (func_id) {
+ case PSCI_1_0_FN_PSCI_FEATURES:
+ case PSCI_1_0_FN_SET_SUSPEND_MODE:
+ case PSCI_1_1_FN64_SYSTEM_RESET2:
+ return psci_forward(host_ctxt);
+ case PSCI_1_0_FN64_SYSTEM_SUSPEND:
+ return psci_system_suspend(func_id, host_ctxt);
+ default:
+ return psci_0_2_handler(func_id, host_ctxt);
+ }
+}
+
+bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt)
+{
+ u64 func_id = get_psci_func_id(host_ctxt);
+ unsigned long ret;
+
+ if (!is_psci_call(func_id))
+ return false;
+
+ switch (kvm_host_psci_version) {
+ case PSCI_VERSION(0, 1):
+ ret = psci_0_1_handler(func_id, host_ctxt);
+ break;
+ case PSCI_VERSION(0, 2):
+ ret = psci_0_2_handler(func_id, host_ctxt);
+ break;
+ default:
+ ret = psci_1_0_handler(func_id, host_ctxt);
+ break;
+ }
+
+ cpu_reg(host_ctxt, 0) = ret;
+ cpu_reg(host_ctxt, 1) = 0;
+ cpu_reg(host_ctxt, 2) = 0;
+ cpu_reg(host_ctxt, 3) = 0;
+ return true;
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 3e50ff35aa4f..f3d0e9eca56c 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -97,7 +97,10 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
write_sysreg(mdcr_el2, mdcr_el2);
- write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2);
+ if (is_protected_kvm_enabled())
+ write_sysreg(HCR_HOST_NVHE_PROTECTED_FLAGS, hcr_el2);
+ else
+ write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2);
write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
write_sysreg(__kvm_hyp_host_vector, vbar_el2);
}
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index 4130b72e6891..d8cc51bd60bf 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -24,6 +24,30 @@ static u64 tag_val;
static u64 va_mask;
/*
+ * Compute HYP VA by using the same computation as kern_hyp_va().
+ */
+static u64 __early_kern_hyp_va(u64 addr)
+{
+ addr &= va_mask;
+ addr |= tag_val << tag_lsb;
+ return addr;
+}
+
+/*
+ * Store a hyp VA <-> PA offset into a hyp-owned variable.
+ */
+static void init_hyp_physvirt_offset(void)
+{
+ extern s64 kvm_nvhe_sym(hyp_physvirt_offset);
+ u64 kern_va, hyp_va;
+
+ /* Compute the offset from the hyp VA and PA of a random symbol. */
+ kern_va = (u64)kvm_ksym_ref(__hyp_text_start);
+ hyp_va = __early_kern_hyp_va(kern_va);
+ CHOOSE_NVHE_SYM(hyp_physvirt_offset) = (s64)__pa(kern_va) - (s64)hyp_va;
+}
+
+/*
* We want to generate a hyp VA with the following format (with V ==
* vabits_actual):
*
@@ -54,6 +78,8 @@ __init void kvm_compute_layout(void)
tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb);
}
tag_val >>= tag_lsb;
+
+ init_hyp_physvirt_offset();
}
static u32 compute_instruction(int n, u32 rd, u32 rn)
@@ -146,9 +172,7 @@ void kvm_patch_vector_branch(struct alt_instr *alt,
/*
* Compute HYP VA by using the same computation as kern_hyp_va()
*/
- addr = (uintptr_t)kvm_ksym_ref(__kvm_hyp_vector);
- addr &= va_mask;
- addr |= tag_val << tag_lsb;
+ addr = __early_kern_hyp_va((u64)kvm_ksym_ref(__kvm_hyp_vector));
/* Use PC[10:7] to branch to the same vector in KVM */
addr |= ((u64)origptr & GENMASK_ULL(10, 7));
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index 48a3a26eff66..af9afcbec92c 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -24,20 +24,20 @@ SYM_FUNC_START(__arch_clear_user)
subs x1, x1, #8
b.mi 2f
1:
-uao_user_alternative 9f, str, sttr, xzr, x0, 8
+user_ldst 9f, sttr, xzr, x0, 8
subs x1, x1, #8
b.pl 1b
2: adds x1, x1, #4
b.mi 3f
-uao_user_alternative 9f, str, sttr, wzr, x0, 4
+user_ldst 9f, sttr, wzr, x0, 4
sub x1, x1, #4
3: adds x1, x1, #2
b.mi 4f
-uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
+user_ldst 9f, sttrh, wzr, x0, 2
sub x1, x1, #2
4: adds x1, x1, #1
b.mi 5f
-uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
+user_ldst 9f, sttrb, wzr, x0, 0
5: mov x0, #0
ret
SYM_FUNC_END(__arch_clear_user)
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 0f8a3a9e3795..95cd62d67371 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -21,7 +21,7 @@
*/
.macro ldrb1 reg, ptr, val
- uao_user_alternative 9998f, ldrb, ldtrb, \reg, \ptr, \val
+ user_ldst 9998f, ldtrb, \reg, \ptr, \val
.endm
.macro strb1 reg, ptr, val
@@ -29,7 +29,7 @@
.endm
.macro ldrh1 reg, ptr, val
- uao_user_alternative 9998f, ldrh, ldtrh, \reg, \ptr, \val
+ user_ldst 9998f, ldtrh, \reg, \ptr, \val
.endm
.macro strh1 reg, ptr, val
@@ -37,7 +37,7 @@
.endm
.macro ldr1 reg, ptr, val
- uao_user_alternative 9998f, ldr, ldtr, \reg, \ptr, \val
+ user_ldst 9998f, ldtr, \reg, \ptr, \val
.endm
.macro str1 reg, ptr, val
@@ -45,7 +45,7 @@
.endm
.macro ldp1 reg1, reg2, ptr, val
- uao_ldp 9998f, \reg1, \reg2, \ptr, \val
+ user_ldp 9998f, \reg1, \reg2, \ptr, \val
.endm
.macro stp1 reg1, reg2, ptr, val
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index 80e37ada0ee1..1f61cd0df062 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -22,35 +22,35 @@
* x0 - bytes not copied
*/
.macro ldrb1 reg, ptr, val
- uao_user_alternative 9998f, ldrb, ldtrb, \reg, \ptr, \val
+ user_ldst 9998f, ldtrb, \reg, \ptr, \val
.endm
.macro strb1 reg, ptr, val
- uao_user_alternative 9998f, strb, sttrb, \reg, \ptr, \val
+ user_ldst 9998f, sttrb, \reg, \ptr, \val
.endm
.macro ldrh1 reg, ptr, val
- uao_user_alternative 9998f, ldrh, ldtrh, \reg, \ptr, \val
+ user_ldst 9998f, ldtrh, \reg, \ptr, \val
.endm
.macro strh1 reg, ptr, val
- uao_user_alternative 9998f, strh, sttrh, \reg, \ptr, \val
+ user_ldst 9998f, sttrh, \reg, \ptr, \val
.endm
.macro ldr1 reg, ptr, val
- uao_user_alternative 9998f, ldr, ldtr, \reg, \ptr, \val
+ user_ldst 9998f, ldtr, \reg, \ptr, \val
.endm
.macro str1 reg, ptr, val
- uao_user_alternative 9998f, str, sttr, \reg, \ptr, \val
+ user_ldst 9998f, sttr, \reg, \ptr, \val
.endm
.macro ldp1 reg1, reg2, ptr, val
- uao_ldp 9998f, \reg1, \reg2, \ptr, \val
+ user_ldp 9998f, \reg1, \reg2, \ptr, \val
.endm
.macro stp1 reg1, reg2, ptr, val
- uao_stp 9998f, \reg1, \reg2, \ptr, \val
+ user_stp 9998f, \reg1, \reg2, \ptr, \val
.endm
end .req x5
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 4ec59704b8f2..043da90f5dd7 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -24,7 +24,7 @@
.endm
.macro strb1 reg, ptr, val
- uao_user_alternative 9998f, strb, sttrb, \reg, \ptr, \val
+ user_ldst 9998f, sttrb, \reg, \ptr, \val
.endm
.macro ldrh1 reg, ptr, val
@@ -32,7 +32,7 @@
.endm
.macro strh1 reg, ptr, val
- uao_user_alternative 9998f, strh, sttrh, \reg, \ptr, \val
+ user_ldst 9998f, sttrh, \reg, \ptr, \val
.endm
.macro ldr1 reg, ptr, val
@@ -40,7 +40,7 @@
.endm
.macro str1 reg, ptr, val
- uao_user_alternative 9998f, str, sttr, \reg, \ptr, \val
+ user_ldst 9998f, sttr, \reg, \ptr, \val
.endm
.macro ldp1 reg1, reg2, ptr, val
@@ -48,7 +48,7 @@
.endm
.macro stp1 reg1, reg2, ptr, val
- uao_stp 9998f, \reg1, \reg2, \ptr, \val
+ user_stp 9998f, \reg1, \reg2, \ptr, \val
.endm
end .req x5
diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
index 03ca6d8b8670..351537c12f36 100644
--- a/arch/arm64/lib/mte.S
+++ b/arch/arm64/lib/mte.S
@@ -4,7 +4,7 @@
*/
#include <linux/linkage.h>
-#include <asm/alternative.h>
+#include <asm/asm-uaccess.h>
#include <asm/assembler.h>
#include <asm/mte.h>
#include <asm/page.h>
@@ -67,7 +67,7 @@ SYM_FUNC_START(mte_copy_tags_from_user)
mov x3, x1
cbz x2, 2f
1:
- uao_user_alternative 2f, ldrb, ldtrb, w4, x1, 0
+ user_ldst 2f, ldtrb, w4, x1, 0
lsl x4, x4, #MTE_TAG_SHIFT
stg x4, [x0], #MTE_GRANULE_SIZE
add x1, x1, #1
@@ -94,7 +94,7 @@ SYM_FUNC_START(mte_copy_tags_to_user)
1:
ldg x4, [x1]
ubfx x4, x4, #MTE_TAG_SHIFT, #MTE_TAG_SIZE
- uao_user_alternative 2f, strb, sttrb, w4, x0, 0
+ user_ldst 2f, sttrb, w4, x0, 0
add x0, x0, #1
add x1, x1, #MTE_GRANULE_SIZE
subs x2, x2, #1
diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c
index bfa30b75b2b8..c83bb5a4aad2 100644
--- a/arch/arm64/lib/uaccess_flushcache.c
+++ b/arch/arm64/lib/uaccess_flushcache.c
@@ -30,9 +30,7 @@ unsigned long __copy_user_flushcache(void *to, const void __user *from,
{
unsigned long rc;
- uaccess_enable_not_uao();
- rc = __arch_copy_from_user(to, from, n);
- uaccess_disable_not_uao();
+ rc = raw_copy_from_user(to, from, n);
/* See above */
__clean_dcache_area_pop(to, n - rc);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 1ee94002801f..8dc17e650c8e 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -479,11 +479,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
}
if (is_ttbr0_addr(addr) && is_el1_permission_fault(addr, esr, regs)) {
- /* regs->orig_addr_limit may be 0 if we entered from EL0 */
- if (regs->orig_addr_limit == KERNEL_DS)
- die_kernel_fault("access to user memory with fs=KERNEL_DS",
- addr, esr, regs);
-
if (is_el1_instruction_abort(esr))
die_kernel_fault("execution of user memory",
addr, esr, regs);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 23c326a06b2d..29f064e117d9 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -489,6 +489,6 @@ SYM_FUNC_START(__cpu_setup)
/*
* Prepare SCTLR
*/
- mov_q x0, SCTLR_EL1_SET
+ mov_q x0, INIT_SCTLR_EL1_MMU_ON
ret // return to head.S
SYM_FUNC_END(__cpu_setup)

Privacy Policy