aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c2
-rw-r--r--arch/x86/kernel/jump_label.c7
-rw-r--r--arch/x86/kvm/cpuid.c30
-rw-r--r--arch/x86/kvm/ioapic.c2
-rw-r--r--arch/x86/kvm/ioapic.h4
-rw-r--r--arch/x86/kvm/mmu/mmu.c2
-rw-r--r--arch/x86/kvm/mmu/paging.h14
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h4
-rw-r--r--arch/x86/kvm/mmu/spte.h6
-rw-r--r--arch/x86/kvm/svm/avic.c2
-rw-r--r--arch/x86/kvm/svm/nested.c57
-rw-r--r--arch/x86/kvm/svm/sev.c14
-rw-r--r--arch/x86/kvm/svm/svm.c95
-rw-r--r--arch/x86/kvm/svm/svm.h7
-rw-r--r--arch/x86/kvm/svm/svm_onhyperv.h2
-rw-r--r--arch/x86/kvm/vmx/vmx.h2
-rw-r--r--arch/x86/kvm/x86.c9
-rw-r--r--arch/x86/mm/pgtable.c34
-rw-r--r--arch/x86/net/bpf_jit_comp.c10
-rw-r--r--arch/x86/net/bpf_jit_comp32.c6
21 files changed, 218 insertions, 92 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 49270655e827..88fb922c23a0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -202,7 +202,6 @@ config X86
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS
select HAVE_HW_BREAKPOINT
- select HAVE_IDE
select HAVE_IOREMAP_PROT
select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64
select HAVE_IRQ_TIME_ACCOUNTING
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index cc8f1773deca..c890d67a64ad 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -237,7 +237,7 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
for_each_present_cpu(i) {
if (i == 0)
continue;
- ret = hv_call_add_logical_proc(numa_cpu_node(i), i, i);
+ ret = hv_call_add_logical_proc(numa_cpu_node(i), i, cpu_physical_id(i));
BUG_ON(ret);
}
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 674906fad43b..68f091ba8443 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -79,9 +79,10 @@ __jump_label_patch(struct jump_entry *entry, enum jump_label_type type)
return (struct jump_label_patch){.code = code, .size = size};
}
-static inline void __jump_label_transform(struct jump_entry *entry,
- enum jump_label_type type,
- int init)
+static __always_inline void
+__jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type,
+ int init)
{
const struct jump_label_patch jlp = __jump_label_patch(entry, type);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index c42613cfb5ba..739be5da3bca 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -765,7 +765,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS);
edx.split.bit_width_fixed = cap.bit_width_fixed;
- edx.split.anythread_deprecated = 1;
+ if (cap.version)
+ edx.split.anythread_deprecated = 1;
edx.split.reserved1 = 0;
edx.split.reserved2 = 0;
@@ -940,8 +941,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
unsigned phys_as = entry->eax & 0xff;
- if (!g_phys_as)
+ /*
+ * If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as
+ * the guest operates in the same PA space as the host, i.e.
+ * reductions in MAXPHYADDR for memory encryption affect shadow
+ * paging, too.
+ *
+ * If TDP is enabled but an explicit guest MAXPHYADDR is not
+ * provided, use the raw bare metal MAXPHYADDR as reductions to
+ * the HPAs do not affect GPAs.
+ */
+ if (!tdp_enabled)
+ g_phys_as = boot_cpu_data.x86_phys_bits;
+ else if (!g_phys_as)
g_phys_as = phys_as;
+
entry->eax = g_phys_as | (virt_as << 8);
entry->edx = 0;
cpuid_entry_override(entry, CPUID_8000_0008_EBX);
@@ -964,12 +978,18 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
case 0x8000001a:
case 0x8000001e:
break;
- /* Support memory encryption cpuid if host supports it */
case 0x8000001F:
- if (!kvm_cpu_cap_has(X86_FEATURE_SEV))
+ if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) {
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
- else
+ } else {
cpuid_entry_override(entry, CPUID_8000_001F_EAX);
+
+ /*
+ * Enumerate '0' for "PA bits reduction", the adjusted
+ * MAXPHYADDR is enumerated directly (see 0x80000008).
+ */
+ entry->ebx &= ~GENMASK(11, 6);
+ }
break;
/*Add support for Centaur's CPUID instruction*/
case 0xC0000000:
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 698969e18fe3..ff005fe738a4 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -96,7 +96,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
{
ioapic->rtc_status.pending_eoi = 0;
- bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID);
+ bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID + 1);
}
static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 660401700075..11e4065e1617 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -43,13 +43,13 @@ struct kvm_vcpu;
struct dest_map {
/* vcpu bitmap where IRQ has been sent */
- DECLARE_BITMAP(map, KVM_MAX_VCPU_ID);
+ DECLARE_BITMAP(map, KVM_MAX_VCPU_ID + 1);
/*
* Vector sent to a given vcpu, only valid when
* the vcpu's bit in map is set
*/
- u8 vectors[KVM_MAX_VCPU_ID];
+ u8 vectors[KVM_MAX_VCPU_ID + 1];
};
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 845d114ae075..66f7f5bc3482 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -53,6 +53,8 @@
#include <asm/kvm_page_track.h>
#include "trace.h"
+#include "paging.h"
+
extern bool itlb_multihit_kvm_mitigation;
int __read_mostly nx_huge_pages = -1;
diff --git a/arch/x86/kvm/mmu/paging.h b/arch/x86/kvm/mmu/paging.h
new file mode 100644
index 000000000000..de8ab323bb70
--- /dev/null
+++ b/arch/x86/kvm/mmu/paging.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Shadow paging constants/helpers that don't need to be #undef'd. */
+#ifndef __KVM_X86_PAGING_H
+#define __KVM_X86_PAGING_H
+
+#define GUEST_PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
+#define PT64_LVL_ADDR_MASK(level) \
+ (GUEST_PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
+ * PT64_LEVEL_BITS))) - 1))
+#define PT64_LVL_OFFSET_MASK(level) \
+ (GUEST_PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
+ * PT64_LEVEL_BITS))) - 1))
+#endif /* __KVM_X86_PAGING_H */
+
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 490a028ddabe..ee044d357b5f 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -24,7 +24,7 @@
#define pt_element_t u64
#define guest_walker guest_walker64
#define FNAME(name) paging##64_##name
- #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
+ #define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK
#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
@@ -57,7 +57,7 @@
#define pt_element_t u64
#define guest_walker guest_walkerEPT
#define FNAME(name) ept_##name
- #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
+ #define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK
#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 7a5ce9314107..eb7b227fc6cf 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -38,12 +38,6 @@ static_assert(SPTE_TDP_AD_ENABLED_MASK == 0);
#else
#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
#endif
-#define PT64_LVL_ADDR_MASK(level) \
- (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
- * PT64_LEVEL_BITS))) - 1))
-#define PT64_LVL_OFFSET_MASK(level) \
- (PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
- * PT64_LEVEL_BITS))) - 1))
#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \
| shadow_x_mask | shadow_nx_mask | shadow_me_mask)
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 1d01da64c333..a8ad78a2faa1 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -646,7 +646,7 @@ out:
void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct vmcb *vmcb = svm->vmcb;
+ struct vmcb *vmcb = svm->vmcb01.ptr;
bool activated = kvm_vcpu_apicv_active(vcpu);
if (!enable_apicv)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 21d03e3a5dfd..61738ff8ef33 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -154,6 +154,10 @@ void recalc_intercepts(struct vcpu_svm *svm)
for (i = 0; i < MAX_INTERCEPT; i++)
c->intercepts[i] |= g->intercepts[i];
+
+ /* If SMI is not intercepted, ignore guest SMI intercept as well */
+ if (!intercept_smi)
+ vmcb_clr_intercept(c, INTERCEPT_SMI);
}
static void copy_vmcb_control_area(struct vmcb_control_area *dst,
@@ -304,8 +308,8 @@ static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu,
return true;
}
-static void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
- struct vmcb_control_area *control)
+void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
+ struct vmcb_control_area *control)
{
copy_vmcb_control_area(&svm->nested.ctl, control);
@@ -511,7 +515,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
* Also covers avic_vapic_bar, avic_backing_page, avic_logical_id,
* avic_physical_id.
*/
- WARN_ON(svm->vmcb01.ptr->control.int_ctl & AVIC_ENABLE_MASK);
+ WARN_ON(kvm_apicv_activated(svm->vcpu.kvm));
/* Copied from vmcb01. msrpm_base can be overwritten later. */
svm->vmcb->control.nested_ctl = svm->vmcb01.ptr->control.nested_ctl;
@@ -618,6 +622,11 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
struct kvm_host_map map;
u64 vmcb12_gpa;
+ if (!svm->nested.hsave_msr) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+
if (is_smm(vcpu)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
@@ -692,7 +701,28 @@ out:
return ret;
}
-void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
+/* Copy state save area fields which are handled by VMRUN */
+void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
+ struct vmcb_save_area *from_save)
+{
+ to_save->es = from_save->es;
+ to_save->cs = from_save->cs;
+ to_save->ss = from_save->ss;
+ to_save->ds = from_save->ds;
+ to_save->gdtr = from_save->gdtr;
+ to_save->idtr = from_save->idtr;
+ to_save->rflags = from_save->rflags | X86_EFLAGS_FIXED;
+ to_save->efer = from_save->efer;
+ to_save->cr0 = from_save->cr0;
+ to_save->cr3 = from_save->cr3;
+ to_save->cr4 = from_save->cr4;
+ to_save->rax = from_save->rax;
+ to_save->rsp = from_save->rsp;
+ to_save->rip = from_save->rip;
+ to_save->cpl = 0;
+}
+
+void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
{
to_vmcb->save.fs = from_vmcb->save.fs;
to_vmcb->save.gs = from_vmcb->save.gs;
@@ -1355,28 +1385,11 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
- svm->vmcb01.ptr->save.es = save->es;
- svm->vmcb01.ptr->save.cs = save->cs;
- svm->vmcb01.ptr->save.ss = save->ss;
- svm->vmcb01.ptr->save.ds = save->ds;
- svm->vmcb01.ptr->save.gdtr = save->gdtr;
- svm->vmcb01.ptr->save.idtr = save->idtr;
- svm->vmcb01.ptr->save.rflags = save->rflags | X86_EFLAGS_FIXED;
- svm->vmcb01.ptr->save.efer = save->efer;
- svm->vmcb01.ptr->save.cr0 = save->cr0;
- svm->vmcb01.ptr->save.cr3 = save->cr3;
- svm->vmcb01.ptr->save.cr4 = save->cr4;
- svm->vmcb01.ptr->save.rax = save->rax;
- svm->vmcb01.ptr->save.rsp = save->rsp;
- svm->vmcb01.ptr->save.rip = save->rip;
- svm->vmcb01.ptr->save.cpl = 0;
-
+ svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
nested_load_control_from_vmcb12(svm, ctl);
svm_switch_vmcb(svm, &svm->nested.vmcb02);
-
nested_vmcb02_prepare_control(svm);
-
kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
ret = 0;
out_free:
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 62926f1a5f7b..6710d9ee2e4b 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1272,8 +1272,8 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* Pin guest memory */
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
PAGE_SIZE, &n, 0);
- if (!guest_page)
- return -EFAULT;
+ if (IS_ERR(guest_page))
+ return PTR_ERR(guest_page);
/* allocate memory for header and transport buffer */
ret = -ENOMEM;
@@ -1310,8 +1310,9 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
}
/* Copy packet header to userspace. */
- ret = copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr,
- params.hdr_len);
+ if (copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr,
+ params.hdr_len))
+ ret = -EFAULT;
e_free_trans_data:
kfree(trans_data);
@@ -1463,11 +1464,12 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
data.trans_len = params.trans_len;
/* Pin guest memory */
- ret = -EFAULT;
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
PAGE_SIZE, &n, 0);
- if (!guest_page)
+ if (IS_ERR(guest_page)) {
+ ret = PTR_ERR(guest_page);
goto e_free_trans;
+ }
/* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */
data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 8834822c00cd..e8ccab50ebf6 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -198,6 +198,11 @@ module_param(avic, bool, 0444);
bool __read_mostly dump_invalid_vmcb;
module_param(dump_invalid_vmcb, bool, 0644);
+
+bool intercept_smi = true;
+module_param(intercept_smi, bool, 0444);
+
+
static bool svm_gp_erratum_intercept = true;
static u8 rsm_ins_bytes[] = "\x0f\xaa";
@@ -1185,7 +1190,10 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
svm_set_intercept(svm, INTERCEPT_INTR);
svm_set_intercept(svm, INTERCEPT_NMI);
- svm_set_intercept(svm, INTERCEPT_SMI);
+
+ if (intercept_smi)
+ svm_set_intercept(svm, INTERCEPT_SMI);
+
svm_set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
svm_set_intercept(svm, INTERCEPT_RDPMC);
svm_set_intercept(svm, INTERCEPT_CPUID);
@@ -1398,8 +1406,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
goto error_free_vmsa_page;
}
- svm_vcpu_init_msrpm(vcpu, svm->msrpm);
-
svm->vmcb01.ptr = page_address(vmcb01_page);
svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT);
@@ -1411,6 +1417,8 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
svm_switch_vmcb(svm, &svm->vmcb01);
init_vmcb(vcpu);
+ svm_vcpu_init_msrpm(vcpu, svm->msrpm);
+
svm_init_osvw(vcpu);
vcpu->arch.microcode_version = 0x01000065;
@@ -1560,8 +1568,11 @@ static void svm_set_vintr(struct vcpu_svm *svm)
{
struct vmcb_control_area *control;
- /* The following fields are ignored when AVIC is enabled */
- WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu));
+ /*
+ * The following fields are ignored when AVIC is enabled
+ */
+ WARN_ON(kvm_apicv_activated(svm->vcpu.kvm));
+
svm_set_intercept(svm, INTERCEPT_VINTR);
/*
@@ -1923,7 +1934,7 @@ static int npf_interception(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
+ u64 fault_address = svm->vmcb->control.exit_info_2;
u64 error_code = svm->vmcb->control.exit_info_1;
trace_kvm_page_fault(fault_address, error_code);
@@ -2106,6 +2117,11 @@ static int nmi_interception(struct kvm_vcpu *vcpu)
return 1;
}
+static int smi_interception(struct kvm_vcpu *vcpu)
+{
+ return 1;
+}
+
static int intr_interception(struct kvm_vcpu *vcpu)
{
++vcpu->stat.irq_exits;
@@ -2134,11 +2150,12 @@ static int vmload_vmsave_interception(struct kvm_vcpu *vcpu, bool vmload)
ret = kvm_skip_emulated_instruction(vcpu);
if (vmload) {
- nested_svm_vmloadsave(vmcb12, svm->vmcb);
+ svm_copy_vmloadsave_state(svm->vmcb, vmcb12);
svm->sysenter_eip_hi = 0;
svm->sysenter_esp_hi = 0;
- } else
- nested_svm_vmloadsave(svm->vmcb, vmcb12);
+ } else {
+ svm_copy_vmloadsave_state(vmcb12, svm->vmcb);
+ }
kvm_vcpu_unmap(vcpu, &map, true);
@@ -2941,7 +2958,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
svm_disable_lbrv(vcpu);
break;
case MSR_VM_HSAVE_PA:
- svm->nested.hsave_msr = data;
+ /*
+ * Old kernels did not validate the value written to
+ * MSR_VM_HSAVE_PA. Allow KVM_SET_MSR to set an invalid
+ * value to allow live migrating buggy or malicious guests
+ * originating from those kernels.
+ */
+ if (!msr->host_initiated && !page_address_valid(vcpu, data))
+ return 1;
+
+ svm->nested.hsave_msr = data & PAGE_MASK;
break;
case MSR_VM_CR:
return svm_set_vm_cr(vcpu, data);
@@ -3080,8 +3106,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception,
[SVM_EXIT_INTR] = intr_interception,
[SVM_EXIT_NMI] = nmi_interception,
- [SVM_EXIT_SMI] = kvm_emulate_as_nop,
- [SVM_EXIT_INIT] = kvm_emulate_as_nop,
+ [SVM_EXIT_SMI] = smi_interception,
[SVM_EXIT_VINTR] = interrupt_window_interception,
[SVM_EXIT_RDPMC] = kvm_emulate_rdpmc,
[SVM_EXIT_CPUID] = kvm_emulate_cpuid,
@@ -4288,6 +4313,7 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct kvm_host_map map_save;
int ret;
if (is_guest_mode(vcpu)) {
@@ -4303,6 +4329,29 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
ret = nested_svm_vmexit(svm);
if (ret)
return ret;
+
+ /*
+ * KVM uses VMCB01 to store L1 host state while L2 runs but
+ * VMCB01 is going to be used during SMM and thus the state will
+ * be lost. Temporary save non-VMLOAD/VMSAVE state to the host save
+ * area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the
+ * format of the area is identical to guest save area offsetted
+ * by 0x400 (matches the offset of 'struct vmcb_save_area'
+ * within 'struct vmcb'). Note: HSAVE area may also be used by
+ * L1 hypervisor to save additional host context (e.g. KVM does
+ * that, see svm_prepare_guest_switch()) which must be
+ * preserved.
+ */
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
+ &map_save) == -EINVAL)
+ return 1;
+
+ BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
+
+ svm_copy_vmrun_state(map_save.hva + 0x400,
+ &svm->vmcb01.ptr->save);
+
+ kvm_vcpu_unmap(vcpu, &map_save, true);
}
return 0;
}
@@ -4310,13 +4359,14 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct kvm_host_map map;
+ struct kvm_host_map map, map_save;
int ret = 0;
if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8);
u64 vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
+ struct vmcb *vmcb12;
if (guest) {
if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
@@ -4332,8 +4382,25 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
if (svm_allocate_nested(svm))
return 1;
- ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, map.hva);
+ vmcb12 = map.hva;
+
+ nested_load_control_from_vmcb12(svm, &vmcb12->control);
+
+ ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12);
kvm_vcpu_unmap(vcpu, &map, true);
+
+ /*
+ * Restore L1 host state from L1 HSAVE area as VMCB01 was
+ * used during SMM (see svm_enter_smm())
+ */
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
+ &map_save) == -EINVAL)
+ return 1;
+
+ svm_copy_vmrun_state(&svm->vmcb01.ptr->save,
+ map_save.hva + 0x400);
+
+ kvm_vcpu_unmap(vcpu, &map_save, true);
}
}
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index f89b623bb591..bd0fe94c2920 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -31,6 +31,7 @@
#define MSRPM_OFFSETS 16
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
+extern bool intercept_smi;
/*
* Clean bits in VMCB.
@@ -463,7 +464,9 @@ void svm_leave_nested(struct vcpu_svm *svm);
void svm_free_nested(struct vcpu_svm *svm);
int svm_allocate_nested(struct vcpu_svm *svm);
int nested_svm_vmrun(struct kvm_vcpu *vcpu);
-void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
+void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
+ struct vmcb_save_area *from_save);
+void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
int nested_svm_vmexit(struct vcpu_svm *svm);
static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
@@ -479,6 +482,8 @@ int nested_svm_check_permissions(struct kvm_vcpu *vcpu);
int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
bool has_error_code, u32 error_code);
int nested_svm_exit_special(struct vcpu_svm *svm);
+void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
+ struct vmcb_control_area *control);
void nested_sync_control_from_vmcb02(struct vcpu_svm *svm);
void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm);
void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb);
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
index 9b9a55abc29f..c53b8bf8d013 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -89,7 +89,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments(
* as we mark it dirty unconditionally towards end of vcpu
* init phase.
*/
- if (vmcb && vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
+ if (vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
hve->hv_enlightenments_control.msr_bitmap)
vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
}
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 3979a947933a..db88ed4f2121 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -14,8 +14,6 @@
#include "vmx_ops.h"
#include "cpuid.h"
-extern const u32 vmx_msr_index[];
-
#define MSR_TYPE_R 1
#define MSR_TYPE_W 2
#define MSR_TYPE_RW 3
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c6dc1b445231..4116567f3d44 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3407,7 +3407,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
break;
case MSR_KVM_ASYNC_PF_ACK:
- if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+ if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
return 1;
if (data & 0x1) {
vcpu->arch.apf.pageready_pending = false;
@@ -3746,7 +3746,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vcpu->arch.apf.msr_int_val;
break;
case MSR_KVM_ASYNC_PF_ACK:
- if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+ if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
return 1;
msr_info->data = 0;
@@ -9601,6 +9601,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
set_debugreg(vcpu->arch.eff_db[3], 3);
set_debugreg(vcpu->arch.dr6, 6);
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
+ } else if (unlikely(hw_breakpoint_active())) {
+ set_debugreg(0, 7);
}
for (;;) {
@@ -10985,9 +10987,6 @@ int kvm_arch_hardware_setup(void *opaque)
int r;
rdmsrl_safe(MSR_EFER, &host_efer);
- if (WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_NX) &&
- !(host_efer & EFER_NX)))
- return -EIO;
if (boot_cpu_has(X86_FEATURE_XSAVES))
rdmsrl(MSR_IA32_XSS, host_xss);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 3364fe62b903..3481b35cb4ec 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -682,7 +682,6 @@ int p4d_clear_huge(p4d_t *p4d)
}
#endif
-#if CONFIG_PGTABLE_LEVELS > 3
/**
* pud_set_huge - setup kernel PUD mapping
*
@@ -722,23 +721,6 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
}
/**
- * pud_clear_huge - clear kernel PUD mapping when it is set
- *
- * Returns 1 on success and 0 on failure (no PUD map is found).
- */
-int pud_clear_huge(pud_t *pud)
-{
- if (pud_large(*pud)) {
- pud_clear(pud);
- return 1;
- }
-
- return 0;
-}
-#endif
-
-#if CONFIG_PGTABLE_LEVELS > 2
-/**
* pmd_set_huge - setup kernel PMD mapping
*
* See text over pud_set_huge() above.
@@ -769,6 +751,21 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
}
/**
+ * pud_clear_huge - clear kernel PUD mapping when it is set
+ *
+ * Returns 1 on success and 0 on failure (no PUD map is found).
+ */
+int pud_clear_huge(pud_t *pud)
+{
+ if (pud_large(*pud)) {
+ pud_clear(pud);
+ return 1;
+ }
+
+ return 0;
+}
+
+/**
* pmd_clear_huge - clear kernel PMD mapping when it is set
*
* Returns 1 on success and 0 on failure (no PMD map is found).
@@ -782,7 +779,6 @@ int pmd_clear_huge(pmd_t *pmd)
return 0;
}
-#endif
#ifdef CONFIG_X86_64
/**
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index e835164189f1..16d76f814e9b 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -570,6 +570,9 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
for (i = 0; i < prog->aux->size_poke_tab; i++) {
poke = &prog->aux->poke_tab[i];
+ if (poke->aux && poke->aux != prog->aux)
+ continue;
+
WARN_ON_ONCE(READ_ONCE(poke->tailcall_target_stable));
if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
@@ -1216,6 +1219,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
}
break;
+ /* speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
+ if (boot_cpu_has(X86_FEATURE_XMM2))
+ /* Emit 'lfence' */
+ EMIT3(0x0F, 0xAE, 0xE8);
+ break;
+
/* ST: *(u8*)(dst_reg + off) = imm */
case BPF_ST | BPF_MEM | BPF_B:
if (is_ereg(dst_reg))
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 3da88ded6ee3..3bfda5f502cb 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -1886,6 +1886,12 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
i++;
break;
}
+ /* speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
+ if (boot_cpu_has(X86_FEATURE_XMM2))
+ /* Emit 'lfence' */
+ EMIT3(0x0F, 0xAE, 0xE8);
+ break;
/* ST: *(u8*)(dst_reg + off) = imm */
case BPF_ST | BPF_MEM | BPF_H:
case BPF_ST | BPF_MEM | BPF_B:

Privacy Policy