aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-09-27 13:58:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-27 13:58:23 -0700
commit9cccec2bf32fa2a8039cfcd228b9f3a4f0a4f5aa (patch)
treee6fd8df31326ba59abb1e053110467884c2f79b0 /arch
parente6609f2c07de03b948fd6c37c5eb4ade3a6d785c (diff)
parent50b078184604fea95adbb144ff653912fb0e48c6 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "A bit late... I got sidetracked by back-from-vacation routines and conferences. But most of these patches are already a few weeks old and things look more calm on the mailing list than what this pull request would suggest. x86: - missing TLB flush - nested virtualization fixes for SMM (secure boot on nested hypervisor) and other nested SVM fixes - syscall fuzzing fixes - live migration fix for AMD SEV - mirror VMs now work for SEV-ES too - fixes for reset - possible out-of-bounds access in IOAPIC emulation - fix enlightened VMCS on Windows 2022 ARM: - Add missing FORCE target when building the EL2 object - Fix a PMU probe regression on some platforms Generic: - KCSAN fixes selftests: - random fixes, mostly for clang compilation" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (43 commits) selftests: KVM: Explicitly use movq to read xmm registers selftests: KVM: Call ucall_init when setting up in rseq_test KVM: Remove tlbs_dirty KVM: X86: Synchronize the shadow pagetable before link it KVM: X86: Fix missed remote tlb flush in rmap_write_protect() KVM: x86: nSVM: don't copy virt_ext from vmcb12 KVM: x86: nSVM: test eax for 4K alignment for GP errata workaround KVM: x86: selftests: test simultaneous uses of V_IRQ from L1 and L0 KVM: x86: nSVM: restore int_vector in svm_clear_vintr kvm: x86: Add AMD PMU MSRs to msrs_to_save_all[] KVM: x86: nVMX: re-evaluate emulation_required on nested VM exit KVM: x86: nVMX: don't fail nested VM entry on invalid guest state if !from_vmentry KVM: x86: VMX: synthesize invalid VM exit when emulating invalid guest state KVM: x86: nSVM: refactor svm_leave_smm and smm_enter_smm KVM: x86: SVM: call KVM_REQ_GET_NESTED_STATE_PAGES on exit from SMM mode KVM: x86: reset pdptrs_from_userspace when exiting smm KVM: x86: nSVM: restore the L1 host state prior to resuming nested guest on SMM exit KVM: nVMX: Filter out all unsupported controls when eVMCS was activated KVM: KVM: Use cpumask_available() to check for NULL cpumask when kicking vCPUs KVM: Clean up benign vcpu->cpu data races when kicking vCPUs ...
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile2
-rw-r--r--arch/arm64/kvm/perf.c3
-rw-r--r--arch/arm64/kvm/pmu-emul.c9
-rw-r--r--arch/s390/kvm/interrupt.c4
-rw-r--r--arch/s390/kvm/kvm-s390.c2
-rw-r--r--arch/s390/kvm/kvm-s390.h2
-rw-r--r--arch/x86/include/asm/kvm_page_track.h2
-rw-r--r--arch/x86/kvm/emulate.c2
-rw-r--r--arch/x86/kvm/hyperv.c7
-rw-r--r--arch/x86/kvm/hyperv.h2
-rw-r--r--arch/x86/kvm/ioapic.c10
-rw-r--r--arch/x86/kvm/mmu/mmu.c17
-rw-r--r--arch/x86/kvm/mmu/page_track.c4
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h46
-rw-r--r--arch/x86/kvm/svm/nested.c10
-rw-r--r--arch/x86/kvm/svm/sev.c92
-rw-r--r--arch/x86/kvm/svm/svm.c137
-rw-r--r--arch/x86/kvm/svm/svm.h3
-rw-r--r--arch/x86/kvm/vmx/evmcs.c12
-rw-r--r--arch/x86/kvm/vmx/nested.c24
-rw-r--r--arch/x86/kvm/vmx/vmx.c37
-rw-r--r--arch/x86/kvm/vmx/vmx.h5
-rw-r--r--arch/x86/kvm/x86.c28
23 files changed, 280 insertions, 180 deletions
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 5df6193fc430..8d741f71377f 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -54,7 +54,7 @@ $(obj)/kvm_nvhe.tmp.o: $(obj)/hyp.lds $(addprefix $(obj)/,$(hyp-obj)) FORCE
# runtime. Because the hypervisor is part of the kernel binary, relocations
# produce a kernel VA. We enumerate relocations targeting hyp at build time
# and convert the kernel VAs at those positions to hyp VAs.
-$(obj)/hyp-reloc.S: $(obj)/kvm_nvhe.tmp.o $(obj)/gen-hyprel
+$(obj)/hyp-reloc.S: $(obj)/kvm_nvhe.tmp.o $(obj)/gen-hyprel FORCE
$(call if_changed,hyprel)
# 5) Compile hyp-reloc.S and link it into the existing partially linked object.
diff --git a/arch/arm64/kvm/perf.c b/arch/arm64/kvm/perf.c
index f9bb3b14130e..c84fe24b2ea1 100644
--- a/arch/arm64/kvm/perf.c
+++ b/arch/arm64/kvm/perf.c
@@ -50,9 +50,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
int kvm_perf_init(void)
{
- if (kvm_pmu_probe_pmuver() != ID_AA64DFR0_PMUVER_IMP_DEF && !is_protected_kvm_enabled())
- static_branch_enable(&kvm_arm_pmu_available);
-
return perf_register_guest_info_callbacks(&kvm_guest_cbs);
}
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index f5065f23b413..2af3c37445e0 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -740,7 +740,14 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
kvm_pmu_create_perf_event(vcpu, select_idx);
}
-int kvm_pmu_probe_pmuver(void)
+void kvm_host_pmu_init(struct arm_pmu *pmu)
+{
+ if (pmu->pmuver != 0 && pmu->pmuver != ID_AA64DFR0_PMUVER_IMP_DEF &&
+ !kvm_arm_support_pmu_v3() && !is_protected_kvm_enabled())
+ static_branch_enable(&kvm_arm_pmu_available);
+}
+
+static int kvm_pmu_probe_pmuver(void)
{
struct perf_event_attr attr = { };
struct perf_event *event;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 16256e17a544..10722455fd02 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -419,13 +419,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
static void __set_cpu_idle(struct kvm_vcpu *vcpu)
{
kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
- set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+ set_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask);
}
static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
{
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
- clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+ clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask);
}
static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 752a0ffab9bf..6a6dd5e1daf6 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -4066,7 +4066,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
kvm_s390_patch_guest_per_regs(vcpu);
}
- clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
+ clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
vcpu->arch.sie_block->icptcode = 0;
cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index ecd741ee3276..52bc8fbaa60a 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -79,7 +79,7 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
{
- return test_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+ return test_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask);
}
static inline int kvm_is_ucontrol(struct kvm *kvm)
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
index 87bd6025d91d..6a5f3acf2b33 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -46,7 +46,7 @@ struct kvm_page_track_notifier_node {
struct kvm_page_track_notifier_node *node);
};
-void kvm_page_track_init(struct kvm *kvm);
+int kvm_page_track_init(struct kvm *kvm);
void kvm_page_track_cleanup(struct kvm *kvm);
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 2837110e66ed..c589ac832265 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4206,7 +4206,7 @@ static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
- return emulate_ud(ctxt);
+ return emulate_gp(ctxt, 0);
return X86EMUL_CONTINUE;
}
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 232a86a6faaf..d5124b520f76 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -939,7 +939,7 @@ static int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
stimer_init(&hv_vcpu->stimer[i], i);
- hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu);
+ hv_vcpu->vp_index = vcpu->vcpu_idx;
return 0;
}
@@ -1444,7 +1444,6 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
switch (msr) {
case HV_X64_MSR_VP_INDEX: {
struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
- int vcpu_idx = kvm_vcpu_get_idx(vcpu);
u32 new_vp_index = (u32)data;
if (!host || new_vp_index >= KVM_MAX_VCPUS)
@@ -1459,9 +1458,9 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
* VP index is changing, adjust num_mismatched_vp_indexes if
* it now matches or no longer matches vcpu_idx.
*/
- if (hv_vcpu->vp_index == vcpu_idx)
+ if (hv_vcpu->vp_index == vcpu->vcpu_idx)
atomic_inc(&hv->num_mismatched_vp_indexes);
- else if (new_vp_index == vcpu_idx)
+ else if (new_vp_index == vcpu->vcpu_idx)
atomic_dec(&hv->num_mismatched_vp_indexes);
hv_vcpu->vp_index = new_vp_index;
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 730da8537d05..ed1c4e546d04 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -83,7 +83,7 @@ static inline u32 kvm_hv_get_vpindex(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
- return hv_vcpu ? hv_vcpu->vp_index : kvm_vcpu_get_idx(vcpu);
+ return hv_vcpu ? hv_vcpu->vp_index : vcpu->vcpu_idx;
}
int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index ff005fe738a4..8c065da73f8e 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -319,8 +319,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
unsigned index;
bool mask_before, mask_after;
union kvm_ioapic_redirect_entry *e;
- unsigned long vcpu_bitmap;
int old_remote_irr, old_delivery_status, old_dest_id, old_dest_mode;
+ DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
switch (ioapic->ioregsel) {
case IOAPIC_REG_VERSION:
@@ -384,9 +384,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
irq.shorthand = APIC_DEST_NOSHORT;
irq.dest_id = e->fields.dest_id;
irq.msi_redir_hint = false;
- bitmap_zero(&vcpu_bitmap, 16);
+ bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
- &vcpu_bitmap);
+ vcpu_bitmap);
if (old_dest_mode != e->fields.dest_mode ||
old_dest_id != e->fields.dest_id) {
/*
@@ -399,10 +399,10 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
kvm_lapic_irq_dest_mode(
!!e->fields.dest_mode);
kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
- &vcpu_bitmap);
+ vcpu_bitmap);
}
kvm_make_scan_ioapic_request_mask(ioapic->kvm,
- &vcpu_bitmap);
+ vcpu_bitmap);
} else {
kvm_make_scan_ioapic_request(ioapic->kvm);
}
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 2d7e61122af8..1a64ba5b9437 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2027,8 +2027,8 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
} while (!sp->unsync_children);
}
-static void mmu_sync_children(struct kvm_vcpu *vcpu,
- struct kvm_mmu_page *parent)
+static int mmu_sync_children(struct kvm_vcpu *vcpu,
+ struct kvm_mmu_page *parent, bool can_yield)
{
int i;
struct kvm_mmu_page *sp;
@@ -2055,12 +2055,18 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
}
if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) {
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
+ if (!can_yield) {
+ kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
+ return -EINTR;
+ }
+
cond_resched_rwlock_write(&vcpu->kvm->mmu_lock);
flush = false;
}
}
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
+ return 0;
}
static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
@@ -2146,9 +2152,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
}
- if (sp->unsync_children)
- kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
-
__clear_sp_write_flooding_count(sp);
trace_get_page:
@@ -3684,7 +3687,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
write_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
- mmu_sync_children(vcpu, sp);
+ mmu_sync_children(vcpu, sp, true);
kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
write_unlock(&vcpu->kvm->mmu_lock);
@@ -3700,7 +3703,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
if (IS_VALID_PAE_ROOT(root)) {
root &= PT64_BASE_ADDR_MASK;
sp = to_shadow_page(root);
- mmu_sync_children(vcpu, sp);
+ mmu_sync_children(vcpu, sp, true);
}
}
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index 269f11f92fd0..21427e84a82e 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -164,13 +164,13 @@ void kvm_page_track_cleanup(struct kvm *kvm)
cleanup_srcu_struct(&head->track_srcu);
}
-void kvm_page_track_init(struct kvm *kvm)
+int kvm_page_track_init(struct kvm *kvm)
{
struct kvm_page_track_notifier_head *head;
head = &kvm->arch.track_notifier_head;
- init_srcu_struct(&head->track_srcu);
INIT_HLIST_HEAD(&head->track_notifier_list);
+ return init_srcu_struct(&head->track_srcu);
}
/*
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 7d03e9b7ccfa..913d52a7923e 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -707,8 +707,27 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
if (!is_shadow_present_pte(*it.sptep)) {
table_gfn = gw->table_gfn[it.level - 2];
access = gw->pt_access[it.level - 2];
- sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
- false, access);
+ sp = kvm_mmu_get_page(vcpu, table_gfn, addr,
+ it.level-1, false, access);
+ /*
+ * We must synchronize the pagetable before linking it
+ * because the guest doesn't need to flush tlb when
+ * the gpte is changed from non-present to present.
+ * Otherwise, the guest may use the wrong mapping.
+ *
+ * For PG_LEVEL_4K, kvm_mmu_get_page() has already
+ * synchronized it transiently via kvm_sync_page().
+ *
+ * For higher level pagetable, we synchronize it via
+ * the slower mmu_sync_children(). If it needs to
+ * break, some progress has been made; return
+ * RET_PF_RETRY and retry on the next #PF.
+ * KVM_REQ_MMU_SYNC is not necessary but it
+ * expedites the process.
+ */
+ if (sp->unsync_children &&
+ mmu_sync_children(vcpu, sp, false))
+ return RET_PF_RETRY;
}
/*
@@ -1047,14 +1066,6 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gpa_t vaddr,
* Using the cached information from sp->gfns is safe because:
* - The spte has a reference to the struct page, so the pfn for a given gfn
* can't change unless all sptes pointing to it are nuked first.
- *
- * Note:
- * We should flush all tlbs if spte is dropped even though guest is
- * responsible for it. Since if we don't, kvm_mmu_notifier_invalidate_page
- * and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't
- * used by guest then tlbs are not flushed, so guest is allowed to access the
- * freed pages.
- * And we increase kvm->tlbs_dirty to delay tlbs flush in this case.
*/
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
@@ -1107,13 +1118,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
return 0;
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
- /*
- * Update spte before increasing tlbs_dirty to make
- * sure no tlb flush is lost after spte is zapped; see
- * the comments in kvm_flush_remote_tlbs().
- */
- smp_wmb();
- vcpu->kvm->tlbs_dirty++;
+ set_spte_ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
continue;
}
@@ -1128,12 +1133,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
if (gfn != sp->gfns[i]) {
drop_spte(vcpu->kvm, &sp->spt[i]);
- /*
- * The same as above where we are doing
- * prefetch_invalid_gpte().
- */
- smp_wmb();
- vcpu->kvm->tlbs_dirty++;
+ set_spte_ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
continue;
}
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 2545d0c61985..510b833cbd39 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -545,7 +545,6 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
(svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
(svm->vmcb01.ptr->control.int_ctl & int_ctl_vmcb01_bits);
- svm->vmcb->control.virt_ext = svm->nested.ctl.virt_ext;
svm->vmcb->control.int_vector = svm->nested.ctl.int_vector;
svm->vmcb->control.int_state = svm->nested.ctl.int_state;
svm->vmcb->control.event_inj = svm->nested.ctl.event_inj;
@@ -579,7 +578,7 @@ static void nested_svm_copy_common_state(struct vmcb *from_vmcb, struct vmcb *to
}
int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
- struct vmcb *vmcb12)
+ struct vmcb *vmcb12, bool from_vmrun)
{
struct vcpu_svm *svm = to_svm(vcpu);
int ret;
@@ -609,13 +608,16 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
nested_vmcb02_prepare_save(svm, vmcb12);
ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3,
- nested_npt_enabled(svm), true);
+ nested_npt_enabled(svm), from_vmrun);
if (ret)
return ret;
if (!npt_enabled)
vcpu->arch.mmu->inject_page_fault = svm_inject_page_fault_nested;
+ if (!from_vmrun)
+ kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+
svm_set_gif(svm, true);
return 0;
@@ -681,7 +683,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
svm->nested.nested_run_pending = 1;
- if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12))
+ if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, true))
goto out_exit_err;
if (nested_svm_vmrun_msrpm(svm))
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 75e0b21ad07c..c36b5fe4c27c 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -595,43 +595,50 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
return 0;
}
-static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
+static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
+ int *error)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_launch_update_vmsa vmsa;
+ struct vcpu_svm *svm = to_svm(vcpu);
+ int ret;
+
+ /* Perform some pre-encryption checks against the VMSA */
+ ret = sev_es_sync_vmsa(svm);
+ if (ret)
+ return ret;
+
+ /*
+ * The LAUNCH_UPDATE_VMSA command will perform in-place encryption of
+ * the VMSA memory content (i.e it will write the same memory region
+ * with the guest's key), so invalidate it first.
+ */
+ clflush_cache_range(svm->vmsa, PAGE_SIZE);
+
+ vmsa.reserved = 0;
+ vmsa.handle = to_kvm_svm(kvm)->sev_info.handle;
+ vmsa.address = __sme_pa(svm->vmsa);
+ vmsa.len = PAGE_SIZE;
+ return sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
+}
+
+static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
struct kvm_vcpu *vcpu;
int i, ret;
if (!sev_es_guest(kvm))
return -ENOTTY;
- vmsa.reserved = 0;
-
kvm_for_each_vcpu(i, vcpu, kvm) {
- struct vcpu_svm *svm = to_svm(vcpu);
-
- /* Perform some pre-encryption checks against the VMSA */
- ret = sev_es_sync_vmsa(svm);
+ ret = mutex_lock_killable(&vcpu->mutex);
if (ret)
return ret;
- /*
- * The LAUNCH_UPDATE_VMSA command will perform in-place
- * encryption of the VMSA memory content (i.e it will write
- * the same memory region with the guest's key), so invalidate
- * it first.
- */
- clflush_cache_range(svm->vmsa, PAGE_SIZE);
+ ret = __sev_launch_update_vmsa(kvm, vcpu, &argp->error);
- vmsa.handle = sev->handle;
- vmsa.address = __sme_pa(svm->vmsa);
- vmsa.len = PAGE_SIZE;
- ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa,
- &argp->error);
+ mutex_unlock(&vcpu->mutex);
if (ret)
return ret;
-
- svm->vcpu.arch.guest_state_protected = true;
}
return 0;
@@ -1397,8 +1404,10 @@ static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* Bind ASID to this guest */
ret = sev_bind_asid(kvm, start.handle, error);
- if (ret)
+ if (ret) {
+ sev_decommission(start.handle);
goto e_free_session;
+ }
params.handle = start.handle;
if (copy_to_user((void __user *)(uintptr_t)argp->data,
@@ -1464,7 +1473,7 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* Pin guest memory */
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
- PAGE_SIZE, &n, 0);
+ PAGE_SIZE, &n, 1);
if (IS_ERR(guest_page)) {
ret = PTR_ERR(guest_page);
goto e_free_trans;
@@ -1501,6 +1510,20 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);
}
+static bool cmd_allowed_from_miror(u32 cmd_id)
+{
+ /*
+ * Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES
+ * active mirror VMs. Also allow the debugging and status commands.
+ */
+ if (cmd_id == KVM_SEV_LAUNCH_UPDATE_VMSA ||
+ cmd_id == KVM_SEV_GUEST_STATUS || cmd_id == KVM_SEV_DBG_DECRYPT ||
+ cmd_id == KVM_SEV_DBG_ENCRYPT)
+ return true;
+
+ return false;
+}
+
int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
{
struct kvm_sev_cmd sev_cmd;
@@ -1517,8 +1540,9 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
mutex_lock(&kvm->lock);
- /* enc_context_owner handles all memory enc operations */
- if (is_mirroring_enc_context(kvm)) {
+ /* Only the enc_context_owner handles some memory enc operations. */
+ if (is_mirroring_enc_context(kvm) &&
+ !cmd_allowed_from_miror(sev_cmd.id)) {
r = -EINVAL;
goto out;
}
@@ -1715,8 +1739,7 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
{
struct file *source_kvm_file;
struct kvm *source_kvm;
- struct kvm_sev_info *mirror_sev;
- unsigned int asid;
+ struct kvm_sev_info source_sev, *mirror_sev;
int ret;
source_kvm_file = fget(source_fd);
@@ -1739,7 +1762,8 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
goto e_source_unlock;
}
- asid = to_kvm_svm(source_kvm)->sev_info.asid;
+ memcpy(&source_sev, &to_kvm_svm(source_kvm)->sev_info,
+ sizeof(source_sev));
/*
* The mirror kvm holds an enc_context_owner ref so its asid can't
@@ -1759,8 +1783,16 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
/* Set enc_context_owner and copy its encryption context over */
mirror_sev = &to_kvm_svm(kvm)->sev_info;
mirror_sev->enc_context_owner = source_kvm;
- mirror_sev->asid = asid;
mirror_sev->active = true;
+ mirror_sev->asid = source_sev.asid;
+ mirror_sev->fd = source_sev.fd;
+ mirror_sev->es_active = source_sev.es_active;
+ mirror_sev->handle = source_sev.handle;
+ /*
+ * Do not copy ap_jump_table. Since the mirror does not share the same
+ * KVM contexts as the original, and they may have different
+ * memory-views.
+ */
mutex_unlock(&kvm->lock);
return 0;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 05e8d4d27969..989685098b3e 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1566,6 +1566,8 @@ static void svm_clear_vintr(struct vcpu_svm *svm)
svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl &
V_IRQ_INJECTION_BITS_MASK;
+
+ svm->vmcb->control.int_vector = svm->nested.ctl.int_vector;
}
vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
@@ -2222,6 +2224,10 @@ static int gp_interception(struct kvm_vcpu *vcpu)
if (error_code)
goto reinject;
+ /* All SVM instructions expect page aligned RAX */
+ if (svm->vmcb->save.rax & ~PAGE_MASK)
+ goto reinject;
+
/* Decode the instruction for usage later */
if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
goto reinject;
@@ -4285,43 +4291,44 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
struct kvm_host_map map_save;
int ret;
- if (is_guest_mode(vcpu)) {
- /* FED8h - SVM Guest */
- put_smstate(u64, smstate, 0x7ed8, 1);
- /* FEE0h - SVM Guest VMCB Physical Address */
- put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
+ if (!is_guest_mode(vcpu))
+ return 0;
- svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
- svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
- svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
+ /* FED8h - SVM Guest */
+ put_smstate(u64, smstate, 0x7ed8, 1);
+ /* FEE0h - SVM Guest VMCB Physical Address */
+ put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
- ret = nested_svm_vmexit(svm);
- if (ret)
- return ret;
+ svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
+ svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
+ svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
- /*
- * KVM uses VMCB01 to store L1 host state while L2 runs but
- * VMCB01 is going to be used during SMM and thus the state will
- * be lost. Temporary save non-VMLOAD/VMSAVE state to the host save
- * area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the
- * format of the area is identical to guest save area offsetted
- * by 0x400 (matches the offset of 'struct vmcb_save_area'
- * within 'struct vmcb'). Note: HSAVE area may also be used by
- * L1 hypervisor to save additional host context (e.g. KVM does
- * that, see svm_prepare_guest_switch()) which must be
- * preserved.
- */
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
- &map_save) == -EINVAL)
- return 1;
+ ret = nested_svm_vmexit(svm);
+ if (ret)
+ return ret;
- BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
+ /*
+ * KVM uses VMCB01 to store L1 host state while L2 runs but
+ * VMCB01 is going to be used during SMM and thus the state will
+ * be lost. Temporary save non-VMLOAD/VMSAVE state to the host save
+ * area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the
+ * format of the area is identical to guest save area offsetted
+ * by 0x400 (matches the offset of 'struct vmcb_save_area'
+ * within 'struct vmcb'). Note: HSAVE area may also be used by
+ * L1 hypervisor to save additional host context (e.g. KVM does
+ * that, see svm_prepare_guest_switch()) which must be
+ * preserved.
+ */
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
+ &map_save) == -EINVAL)
+ return 1;
- svm_copy_vmrun_state(map_save.hva + 0x400,
- &svm->vmcb01.ptr->save);
+ BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
- kvm_vcpu_unmap(vcpu, &map_save, true);
- }
+ svm_copy_vmrun_state(map_save.hva + 0x400,
+ &svm->vmcb01.ptr->save);
+
+ kvm_vcpu_unmap(vcpu, &map_save, true);
return 0;
}
@@ -4329,50 +4336,54 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_host_map map, map_save;
- int ret = 0;
+ u64 saved_efer, vmcb12_gpa;
+ struct vmcb *vmcb12;
+ int ret;
- if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
- u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
- u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8);
- u64 vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
- struct vmcb *vmcb12;
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
+ return 0;
- if (guest) {
- if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
- return 1;
+ /* Non-zero if SMI arrived while vCPU was in guest mode. */
+ if (!GET_SMSTATE(u64, smstate, 0x7ed8))
+ return 0;
- if (!(saved_efer & EFER_SVME))
- return 1;
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
+ return 1;
- if (kvm_vcpu_map(vcpu,
- gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
- return 1;
+ saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
+ if (!(saved_efer & EFER_SVME))
+ return 1;
- if (svm_allocate_nested(svm))
- return 1;
+ vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
+ return 1;
- vmcb12 = map.hva;
+ ret = 1;
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save) == -EINVAL)
+ goto unmap_map;
- nested_load_control_from_vmcb12(svm, &vmcb12->control);
+ if (svm_allocate_nested(svm))
+ goto unmap_save;
- ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12);
- kvm_vcpu_unmap(vcpu, &map, true);
+ /*
+ * Restore L1 host state from L1 HSAVE area as VMCB01 was
+ * used during SMM (see svm_enter_smm())
+ */
- /*
- * Restore L1 host state from L1 HSAVE area as VMCB01 was
- * used during SMM (see svm_enter_smm())
- */
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
- &map_save) == -EINVAL)
- return 1;
+ svm_copy_vmrun_state(&svm->vmcb01.ptr->save, map_save.hva + 0x400);
- svm_copy_vmrun_state(&svm->vmcb01.ptr->save,
- map_save.hva + 0x400);
+ /*
+ * Enter the nested guest now
+ */
- kvm_vcpu_unmap(vcpu, &map_save, true);
- }
- }
+ vmcb12 = map.hva;
+ nested_load_control_from_vmcb12(svm, &vmcb12->control);
+ ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false);
+unmap_save:
+ kvm_vcpu_unmap(vcpu, &map_save, true);
+unmap_map:
+ kvm_vcpu_unmap(vcpu, &map, true);
return ret;
}
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 524d943f3efc..128a54b1fbf1 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -459,7 +459,8 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_NMI);
}
-int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb_gpa, struct vmcb *vmcb12);
+int enter_svm_guest_mode(struct kvm_vcpu *vcpu,
+ u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun);
void svm_leave_nested(struct vcpu_svm *svm);
void svm_free_nested(struct vcpu_svm *svm);
int svm_allocate_nested(struct vcpu_svm *svm);
diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index 0dab1b7b529f..ba6f99f584ac 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -353,14 +353,20 @@ void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
switch (msr_index) {
case MSR_IA32_VMX_EXIT_CTLS:
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
- ctl_high &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
+ ctl_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
break;
case MSR_IA32_VMX_ENTRY_CTLS:
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
- ctl_high &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+ ctl_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
break;
case MSR_IA32_VMX_PROCBASED_CTLS2:
- ctl_high &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ ctl_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
+ break;
+ case MSR_IA32_VMX_PINBASED_CTLS:
+ ctl_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
+ break;
+ case MSR_IA32_VMX_VMFUNC:
+ ctl_low &= ~EVMCS1_UNSUPPORTED_VMFUNC;
break;
}
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index ccb03d69546c..eedcebf58004 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2583,8 +2583,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
* Guest state is invalid and unrestricted guest is disabled,
* which means L1 attempted VMEntry to L2 with invalid state.
* Fail the VMEntry.
+ *
+ * However when force loading the guest state (SMM exit or
+ * loading nested state after migration, it is possible to
+ * have invalid guest state now, which will be later fixed by
+ * restoring L2 register state
*/
- if (CC(!vmx_guest_state_valid(vcpu))) {
+ if (CC(from_vmentry && !vmx_guest_state_valid(vcpu))) {
*entry_failure_code = ENTRY_FAIL_DEFAULT;
return -EINVAL;
}
@@ -4351,6 +4356,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
vmcs12->vm_exit_msr_load_count))
nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
+
+ to_vmx(vcpu)->emulation_required = vmx_emulation_required(vcpu);
}
static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
@@ -4899,14 +4906,7 @@ out_vmcs02:
return -ENOMEM;
}
-/*
- * Emulate the VMXON instruction.
- * Currently, we just remember that VMX is active, and do not save or even
- * inspect the argument to VMXON (the so-called "VMXON pointer") because we
- * do not currently need to store anything in that guest-allocated memory
- * region. Consequently, VMCLEAR and VMPTRLD also do not verify that the their
- * argument is different from the VMXON pointer (which the spec says they do).
- */
+/* Emulate the VMXON instruction. */
static int handle_vmon(struct kvm_vcpu *vcpu)
{
int ret;
@@ -5903,6 +5903,12 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
case EXIT_REASON_VMFUNC:
/* VM functions are emulated through L2->L0 vmexits. */
return true;
+ case EXIT_REASON_BUS_LOCK:
+ /*
+ * At present, bus lock VM exit is never exposed to L1.
+ * Handle L2's bus locks in L0 directly.
+ */
+ return true;
default:
break;
}
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 0c2c0d5ae873..9ecfcf13a046 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1323,7 +1323,7 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
vmx_prepare_switch_to_host(to_vmx(vcpu));
}
-static bool emulation_required(struct kvm_vcpu *vcpu)
+bool vmx_emulation_required(struct kvm_vcpu *vcpu)
{
return emulate_invalid_guest_state && !vmx_guest_state_valid(vcpu);
}
@@ -1367,7 +1367,7 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
vmcs_writel(GUEST_RFLAGS, rflags);
if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM)
- vmx->emulation_required = emulation_required(vcpu);
+ vmx->emulation_required = vmx_emulation_required(vcpu);
}
u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
@@ -1837,10 +1837,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
&msr_info->data))
return 1;
/*
- * Enlightened VMCS v1 doesn't have certain fields, but buggy
- * Hyper-V versions are still trying to use corresponding
- * features when they are exposed. Filter out the essential
- * minimum.
+ * Enlightened VMCS v1 doesn't have certain VMCS fields but
+ * instead of just ignoring the features, different Hyper-V
+ * versions are either trying to use them and fail or do some
+ * sanity checking and refuse to boot. Filter all unsupported
+ * features out.
*/
if (!msr_info->host_initiated &&
vmx->nested.enlightened_vmcs_enabled)
@@ -3077,7 +3078,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
}
/* depends on vcpu->arch.cr0 to be set to a new value */
- vmx->emulation_required = emulation_required(vcpu);
+ vmx->emulation_required = vmx_emulation_required(vcpu);
}
static int vmx_get_max_tdp_level(void)
@@ -3330,7 +3331,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int
{
__vmx_set_segment(vcpu, var, seg);
- to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
+ to_vmx(vcpu)->emulation_required = vmx_emulation_required(vcpu);
}
static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
@@ -6621,10 +6622,24 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->loaded_vmcs->soft_vnmi_blocked))
vmx->loaded_vmcs->entry_time = ktime_get();
- /* Don't enter VMX if guest state is invalid, let the exit handler
- start emulation until we arrive back to a valid state */
- if (vmx->emulation_required)
+ /*
+ * Don't enter VMX if guest state is invalid, let the exit handler
+ * start emulation until we arrive back to a valid state. Synthesize a
+ * consistency check VM-Exit due to invalid guest state and bail.
+ */
+ if (unlikely(vmx->emulation_required)) {
+
+ /* We don't emulate invalid state of a nested guest */
+ vmx->fail = is_guest_mode(vcpu);
+
+ vmx->exit_reason.full = EXIT_REASON_INVALID_STATE;
+ vmx->exit_reason.failed_vmentry = 1;
+ kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1);
+ vmx->exit_qualification = ENTRY_FAIL_DEFAULT;
+ kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2);
+ vmx->exit_intr_info = 0;
return EXIT_FASTPATH_NONE;
+ }
trace_kvm_entry(vcpu);
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 4858c5fd95f2..592217fd7d92 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -248,12 +248,8 @@ struct vcpu_vmx {
* only loaded into hardware when necessary, e.g. SYSCALL #UDs outside
* of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to
* be loaded into hardware if those conditions aren't met.
- * nr_active_uret_msrs tracks the number of MSRs that need to be loaded
- * into hardware when running the guest. guest_uret_msrs[] is resorted
- * whenever the number of "active" uret MSRs is modified.
*/
struct vmx_uret_msr guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
- int nr_active_uret_msrs;
bool guest_uret_msrs_loaded;
#ifdef CONFIG_X86_64
u64 msr_host_kernel_gs_base;
@@ -359,6 +355,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
unsigned long fs_base, unsigned long gs_base);
int vmx_get_cpl(struct kvm_vcpu *vcpu);
+bool vmx_emulation_required(struct kvm_vcpu *vcpu);
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 28ef14155726..aabd3a2ec1bc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1332,6 +1332,13 @@ static const u32 msrs_to_save_all[] = {
MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
+
+ MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
+ MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
+ MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
+ MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
+ MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
+ MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
};
static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
@@ -2969,7 +2976,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
offsetof(struct compat_vcpu_info, time));
if (vcpu->xen.vcpu_time_info_set)
kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_time_info_cache, 0);
- if (v == kvm_get_vcpu(v->kvm, 0))
+ if (!v->vcpu_idx)
kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
return 0;
}
@@ -7658,6 +7665,13 @@ static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
/* Process a latched INIT or SMI, if any. */
kvm_make_request(KVM_REQ_EVENT, vcpu);
+
+ /*
+ * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
+ * on SMM exit we still need to reload them from
+ * guest memory
+ */
+ vcpu->arch.pdptrs_from_userspace = false;
}
kvm_mmu_reset_context(vcpu);
@@ -10652,6 +10666,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
int r;
vcpu->arch.last_vmentry_cpu = -1;
+ vcpu->arch.regs_avail = ~0;
+ vcpu->arch.regs_dirty = ~0;
if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -10893,6 +10909,9 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
kvm_rip_write(vcpu, 0xfff0);
+ vcpu->arch.cr3 = 0;
+ kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
+
/*
* CR0.CD/NW are set on RESET, preserved on INIT. Note, some versions
* of Intel's SDM list CD/NW as being set on INIT, but they contradict
@@ -11139,9 +11158,15 @@ void kvm_arch_free_vm(struct kvm *kvm)
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
+ int ret;
+
if (type)
return -EINVAL;
+ ret = kvm_page_track_init(kvm);
+ if (ret)
+ return ret;
+
INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
@@ -11174,7 +11199,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_apicv_init(kvm);
kvm_hv_init_vm(kvm);
- kvm_page_track_init(kvm);
kvm_mmu_init_vm(kvm);
kvm_xen_init_vm(kvm);

Privacy Policy