aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu.c3
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c3
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c635
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c1
-rw-r--r--arch/powerpc/kvm/book3s_hv.c65
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c8
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c62
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c138
-rw-r--r--arch/powerpc/kvm/book3s_pr.c130
-rw-r--r--arch/powerpc/kvm/book3s_xics.c192
-rw-r--r--arch/powerpc/kvm/book3s_xics.h7
-rw-r--r--arch/powerpc/kvm/powerpc.c10
12 files changed, 887 insertions, 367 deletions
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index a2eb6d354a57..1992676c7a94 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -224,7 +224,8 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
ptem = kvmppc_mmu_book3s_32_get_ptem(sre, eaddr, primary);
if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) {
- printk(KERN_ERR "KVM: Can't copy data from 0x%lx!\n", ptegp);
+ printk_ratelimited(KERN_ERR
+ "KVM: Can't copy data from 0x%lx!\n", ptegp);
goto no_page_found;
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index b9131aa1aedf..70153578131a 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -265,7 +265,8 @@ do_second:
goto no_page_found;
if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) {
- printk(KERN_ERR "KVM can't copy data from 0x%lx!\n", ptegp);
+ printk_ratelimited(KERN_ERR
+ "KVM: Can't copy data from 0x%lx!\n", ptegp);
goto no_page_found;
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 9df3d940acec..f3158fb16de3 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -40,84 +40,101 @@
#include "trace_hv.h"
-/* Power architecture requires HPT is at least 256kB */
-#define PPC_MIN_HPT_ORDER 18
+//#define DEBUG_RESIZE_HPT 1
+
+#ifdef DEBUG_RESIZE_HPT
+#define resize_hpt_debug(resize, ...) \
+ do { \
+ printk(KERN_DEBUG "RESIZE HPT %p: ", resize); \
+ printk(__VA_ARGS__); \
+ } while (0)
+#else
+#define resize_hpt_debug(resize, ...) \
+ do { } while (0)
+#endif
static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh,
unsigned long ptel, unsigned long *pte_idx_ret);
+
+struct kvm_resize_hpt {
+ /* These fields read-only after init */
+ struct kvm *kvm;
+ struct work_struct work;
+ u32 order;
+
+ /* These fields protected by kvm->lock */
+ int error;
+ bool prepare_done;
+
+ /* Private to the work thread, until prepare_done is true,
+ * then protected by kvm->resize_hpt_sem */
+ struct kvm_hpt_info hpt;
+};
+
static void kvmppc_rmap_reset(struct kvm *kvm);
-long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
+int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
{
unsigned long hpt = 0;
- struct revmap_entry *rev;
+ int cma = 0;
struct page *page = NULL;
- long order = KVM_DEFAULT_HPT_ORDER;
+ struct revmap_entry *rev;
+ unsigned long npte;
- if (htab_orderp) {
- order = *htab_orderp;
- if (order < PPC_MIN_HPT_ORDER)
- order = PPC_MIN_HPT_ORDER;
- }
+ if ((order < PPC_MIN_HPT_ORDER) || (order > PPC_MAX_HPT_ORDER))
+ return -EINVAL;
- kvm->arch.hpt_cma_alloc = 0;
- page = kvm_alloc_hpt(1ul << (order - PAGE_SHIFT));
+ page = kvm_alloc_hpt_cma(1ul << (order - PAGE_SHIFT));
if (page) {
hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
memset((void *)hpt, 0, (1ul << order));
- kvm->arch.hpt_cma_alloc = 1;
+ cma = 1;
}
- /* Lastly try successively smaller sizes from the page allocator */
- /* Only do this if userspace didn't specify a size via ioctl */
- while (!hpt && order > PPC_MIN_HPT_ORDER && !htab_orderp) {
- hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
- __GFP_NOWARN, order - PAGE_SHIFT);
- if (!hpt)
- --order;
- }
+ if (!hpt)
+ hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT
+ |__GFP_NOWARN, order - PAGE_SHIFT);
if (!hpt)
return -ENOMEM;
- kvm->arch.hpt_virt = hpt;
- kvm->arch.hpt_order = order;
/* HPTEs are 2**4 bytes long */
- kvm->arch.hpt_npte = 1ul << (order - 4);
- /* 128 (2**7) bytes in each HPTEG */
- kvm->arch.hpt_mask = (1ul << (order - 7)) - 1;
-
- atomic64_set(&kvm->arch.mmio_update, 0);
+ npte = 1ul << (order - 4);
/* Allocate reverse map array */
- rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte);
+ rev = vmalloc(sizeof(struct revmap_entry) * npte);
if (!rev) {
- pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n");
- goto out_freehpt;
+ pr_err("kvmppc_allocate_hpt: Couldn't alloc reverse map array\n");
+ if (cma)
+ kvm_free_hpt_cma(page, 1 << (order - PAGE_SHIFT));
+ else
+ free_pages(hpt, order - PAGE_SHIFT);
+ return -ENOMEM;
}
- kvm->arch.revmap = rev;
- kvm->arch.sdr1 = __pa(hpt) | (order - 18);
- pr_info("KVM guest htab at %lx (order %ld), LPID %x\n",
- hpt, order, kvm->arch.lpid);
+ info->order = order;
+ info->virt = hpt;
+ info->cma = cma;
+ info->rev = rev;
- if (htab_orderp)
- *htab_orderp = order;
return 0;
+}
- out_freehpt:
- if (kvm->arch.hpt_cma_alloc)
- kvm_release_hpt(page, 1 << (order - PAGE_SHIFT));
- else
- free_pages(hpt, order - PAGE_SHIFT);
- return -ENOMEM;
+void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info)
+{
+ atomic64_set(&kvm->arch.mmio_update, 0);
+ kvm->arch.hpt = *info;
+ kvm->arch.sdr1 = __pa(info->virt) | (info->order - 18);
+
+ pr_debug("KVM guest htab at %lx (order %ld), LPID %x\n",
+ info->virt, (long)info->order, kvm->arch.lpid);
}
-long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
+long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
{
long err = -EBUSY;
- long order;
+ struct kvm_hpt_info info;
if (kvm_is_radix(kvm))
return -EINVAL;
@@ -132,36 +149,44 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
goto out;
}
}
- if (kvm->arch.hpt_virt) {
- order = kvm->arch.hpt_order;
+ if (kvm->arch.hpt.order == order) {
+ /* We already have a suitable HPT */
+
/* Set the entire HPT to 0, i.e. invalid HPTEs */
- memset((void *)kvm->arch.hpt_virt, 0, 1ul << order);
+ memset((void *)kvm->arch.hpt.virt, 0, 1ul << order);
/*
* Reset all the reverse-mapping chains for all memslots
*/
kvmppc_rmap_reset(kvm);
/* Ensure that each vcpu will flush its TLB on next entry. */
cpumask_setall(&kvm->arch.need_tlb_flush);
- *htab_orderp = order;
err = 0;
- } else {
- err = kvmppc_alloc_hpt(kvm, htab_orderp);
- order = *htab_orderp;
+ goto out;
}
- out:
+
+ if (kvm->arch.hpt.virt)
+ kvmppc_free_hpt(&kvm->arch.hpt);
+
+ err = kvmppc_allocate_hpt(&info, order);
+ if (err < 0)
+ goto out;
+ kvmppc_set_hpt(kvm, &info);
+
+out:
mutex_unlock(&kvm->lock);
return err;
}
-void kvmppc_free_hpt(struct kvm *kvm)
+void kvmppc_free_hpt(struct kvm_hpt_info *info)
{
- vfree(kvm->arch.revmap);
- if (kvm->arch.hpt_cma_alloc)
- kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt),
- 1 << (kvm->arch.hpt_order - PAGE_SHIFT));
- else if (kvm->arch.hpt_virt)
- free_pages(kvm->arch.hpt_virt,
- kvm->arch.hpt_order - PAGE_SHIFT);
+ vfree(info->rev);
+ if (info->cma)
+ kvm_free_hpt_cma(virt_to_page(info->virt),
+ 1 << (info->order - PAGE_SHIFT));
+ else if (info->virt)
+ free_pages(info->virt, info->order - PAGE_SHIFT);
+ info->virt = 0;
+ info->order = 0;
}
/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
@@ -196,8 +221,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
if (npages > 1ul << (40 - porder))
npages = 1ul << (40 - porder);
/* Can't use more than 1 HPTE per HPTEG */
- if (npages > kvm->arch.hpt_mask + 1)
- npages = kvm->arch.hpt_mask + 1;
+ if (npages > kvmppc_hpt_mask(&kvm->arch.hpt) + 1)
+ npages = kvmppc_hpt_mask(&kvm->arch.hpt) + 1;
hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
@@ -207,7 +232,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
for (i = 0; i < npages; ++i) {
addr = i << porder;
/* can't use hpt_hash since va > 64 bits */
- hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & kvm->arch.hpt_mask;
+ hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25)))
+ & kvmppc_hpt_mask(&kvm->arch.hpt);
/*
* We assume that the hash table is empty and no
* vcpus are using it at this stage. Since we create
@@ -340,11 +366,11 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
preempt_enable();
return -ENOENT;
}
- hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
+ hptep = (__be64 *)(kvm->arch.hpt.virt + (index << 4));
v = orig_v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
if (cpu_has_feature(CPU_FTR_ARCH_300))
v = hpte_new_to_old_v(v, be64_to_cpu(hptep[1]));
- gr = kvm->arch.revmap[index].guest_rpte;
+ gr = kvm->arch.hpt.rev[index].guest_rpte;
unlock_hpte(hptep, orig_v);
preempt_enable();
@@ -485,8 +511,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
}
index = vcpu->arch.pgfault_index;
- hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
- rev = &kvm->arch.revmap[index];
+ hptep = (__be64 *)(kvm->arch.hpt.virt + (index << 4));
+ rev = &kvm->arch.hpt.rev[index];
preempt_disable();
while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
cpu_relax();
@@ -745,13 +771,53 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
return kvm_handle_hva_range(kvm, hva, hva + 1, handler);
}
+/* Must be called with both HPTE and rmap locked */
+static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
+ unsigned long *rmapp, unsigned long gfn)
+{
+ __be64 *hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
+ struct revmap_entry *rev = kvm->arch.hpt.rev;
+ unsigned long j, h;
+ unsigned long ptel, psize, rcbits;
+
+ j = rev[i].forw;
+ if (j == i) {
+ /* chain is now empty */
+ *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
+ } else {
+ /* remove i from chain */
+ h = rev[i].back;
+ rev[h].forw = j;
+ rev[j].back = h;
+ rev[i].forw = rev[i].back = i;
+ *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
+ }
+
+ /* Now check and modify the HPTE */
+ ptel = rev[i].guest_rpte;
+ psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
+ if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
+ hpte_rpn(ptel, psize) == gfn) {
+ hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
+ kvmppc_invalidate_hpte(kvm, hptep, i);
+ hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
+ /* Harvest R and C */
+ rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
+ *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
+ if (rcbits & HPTE_R_C)
+ kvmppc_update_rmap_change(rmapp, psize);
+ if (rcbits & ~rev[i].guest_rpte) {
+ rev[i].guest_rpte = ptel | rcbits;
+ note_hpte_modification(kvm, &rev[i]);
+ }
+ }
+}
+
static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn)
{
- struct revmap_entry *rev = kvm->arch.revmap;
- unsigned long h, i, j;
+ unsigned long i;
__be64 *hptep;
- unsigned long ptel, psize, rcbits;
unsigned long *rmapp;
rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
@@ -768,7 +834,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
* rmap chain lock.
*/
i = *rmapp & KVMPPC_RMAP_INDEX;
- hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4));
+ hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
/* unlock rmap before spinning on the HPTE lock */
unlock_rmap(rmapp);
@@ -776,37 +842,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
cpu_relax();
continue;
}
- j = rev[i].forw;
- if (j == i) {
- /* chain is now empty */
- *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
- } else {
- /* remove i from chain */
- h = rev[i].back;
- rev[h].forw = j;
- rev[j].back = h;
- rev[i].forw = rev[i].back = i;
- *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
- }
- /* Now check and modify the HPTE */
- ptel = rev[i].guest_rpte;
- psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
- if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
- hpte_rpn(ptel, psize) == gfn) {
- hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
- kvmppc_invalidate_hpte(kvm, hptep, i);
- hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
- /* Harvest R and C */
- rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
- *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
- if (rcbits & HPTE_R_C)
- kvmppc_update_rmap_change(rmapp, psize);
- if (rcbits & ~rev[i].guest_rpte) {
- rev[i].guest_rpte = ptel | rcbits;
- note_hpte_modification(kvm, &rev[i]);
- }
- }
+ kvmppc_unmap_hpte(kvm, i, rmapp, gfn);
unlock_rmap(rmapp);
__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
}
@@ -860,7 +897,7 @@ void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn)
{
- struct revmap_entry *rev = kvm->arch.revmap;
+ struct revmap_entry *rev = kvm->arch.hpt.rev;
unsigned long head, i, j;
__be64 *hptep;
int ret = 0;
@@ -880,7 +917,7 @@ static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
i = head = *rmapp & KVMPPC_RMAP_INDEX;
do {
- hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4));
+ hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
j = rev[i].forw;
/* If this HPTE isn't referenced, ignore it */
@@ -923,7 +960,7 @@ int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn)
{
- struct revmap_entry *rev = kvm->arch.revmap;
+ struct revmap_entry *rev = kvm->arch.hpt.rev;
unsigned long head, i, j;
unsigned long *hp;
int ret = 1;
@@ -940,7 +977,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
if (*rmapp & KVMPPC_RMAP_PRESENT) {
i = head = *rmapp & KVMPPC_RMAP_INDEX;
do {
- hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4));
+ hp = (unsigned long *)(kvm->arch.hpt.virt + (i << 4));
j = rev[i].forw;
if (be64_to_cpu(hp[1]) & HPTE_R_R)
goto out;
@@ -980,7 +1017,7 @@ static int vcpus_running(struct kvm *kvm)
*/
static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
{
- struct revmap_entry *rev = kvm->arch.revmap;
+ struct revmap_entry *rev = kvm->arch.hpt.rev;
unsigned long head, i, j;
unsigned long n;
unsigned long v, r;
@@ -1005,7 +1042,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
i = head = *rmapp & KVMPPC_RMAP_INDEX;
do {
unsigned long hptep1;
- hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4));
+ hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
j = rev[i].forw;
/*
@@ -1172,6 +1209,363 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
}
/*
+ * HPT resizing
+ */
+static int resize_hpt_allocate(struct kvm_resize_hpt *resize)
+{
+ int rc;
+
+ rc = kvmppc_allocate_hpt(&resize->hpt, resize->order);
+ if (rc < 0)
+ return rc;
+
+ resize_hpt_debug(resize, "resize_hpt_allocate(): HPT @ 0x%lx\n",
+ resize->hpt.virt);
+
+ return 0;
+}
+
+static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
+ unsigned long idx)
+{
+ struct kvm *kvm = resize->kvm;
+ struct kvm_hpt_info *old = &kvm->arch.hpt;
+ struct kvm_hpt_info *new = &resize->hpt;
+ unsigned long old_hash_mask = (1ULL << (old->order - 7)) - 1;
+ unsigned long new_hash_mask = (1ULL << (new->order - 7)) - 1;
+ __be64 *hptep, *new_hptep;
+ unsigned long vpte, rpte, guest_rpte;
+ int ret;
+ struct revmap_entry *rev;
+ unsigned long apsize, psize, avpn, pteg, hash;
+ unsigned long new_idx, new_pteg, replace_vpte;
+
+ hptep = (__be64 *)(old->virt + (idx << 4));
+
+ /* Guest is stopped, so new HPTEs can't be added or faulted
+ * in, only unmapped or altered by host actions. So, it's
+ * safe to check this before we take the HPTE lock */
+ vpte = be64_to_cpu(hptep[0]);
+ if (!(vpte & HPTE_V_VALID) && !(vpte & HPTE_V_ABSENT))
+ return 0; /* nothing to do */
+
+ while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
+ cpu_relax();
+
+ vpte = be64_to_cpu(hptep[0]);
+
+ ret = 0;
+ if (!(vpte & HPTE_V_VALID) && !(vpte & HPTE_V_ABSENT))
+ /* Nothing to do */
+ goto out;
+
+ /* Unmap */
+ rev = &old->rev[idx];
+ guest_rpte = rev->guest_rpte;
+
+ ret = -EIO;
+ apsize = hpte_page_size(vpte, guest_rpte);
+ if (!apsize)
+ goto out;
+
+ if (vpte & HPTE_V_VALID) {
+ unsigned long gfn = hpte_rpn(guest_rpte, apsize);
+ int srcu_idx = srcu_read_lock(&kvm->srcu);
+ struct kvm_memory_slot *memslot =
+ __gfn_to_memslot(kvm_memslots(kvm), gfn);
+
+ if (memslot) {
+ unsigned long *rmapp;
+ rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
+
+ lock_rmap(rmapp);
+ kvmppc_unmap_hpte(kvm, idx, rmapp, gfn);
+ unlock_rmap(rmapp);
+ }
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ }
+
+ /* Reload PTE after unmap */
+ vpte = be64_to_cpu(hptep[0]);
+
+ BUG_ON(vpte & HPTE_V_VALID);
+ BUG_ON(!(vpte & HPTE_V_ABSENT));
+
+ ret = 0;
+ if (!(vpte & HPTE_V_BOLTED))
+ goto out;
+
+ rpte = be64_to_cpu(hptep[1]);
+ psize = hpte_base_page_size(vpte, rpte);
+ avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23);
+ pteg = idx / HPTES_PER_GROUP;
+ if (vpte & HPTE_V_SECONDARY)
+ pteg = ~pteg;
+
+ if (!(vpte & HPTE_V_1TB_SEG)) {
+ unsigned long offset, vsid;
+
+ /* We only have 28 - 23 bits of offset in avpn */
+ offset = (avpn & 0x1f) << 23;
+ vsid = avpn >> 5;
+ /* We can find more bits from the pteg value */
+ if (psize < (1ULL << 23))
+ offset |= ((vsid ^ pteg) & old_hash_mask) * psize;
+
+ hash = vsid ^ (offset / psize);
+ } else {
+ unsigned long offset, vsid;
+
+ /* We only have 40 - 23 bits of seg_off in avpn */
+ offset = (avpn & 0x1ffff) << 23;
+ vsid = avpn >> 17;
+ if (psize < (1ULL << 23))
+ offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) * psize;
+
+ hash = vsid ^ (vsid << 25) ^ (offset / psize);
+ }
+
+ new_pteg = hash & new_hash_mask;
+ if (vpte & HPTE_V_SECONDARY) {
+ BUG_ON(~pteg != (hash & old_hash_mask));
+ new_pteg = ~new_pteg;
+ } else {
+ BUG_ON(pteg != (hash & old_hash_mask));
+ }
+
+ new_idx = new_pteg * HPTES_PER_GROUP + (idx % HPTES_PER_GROUP);
+ new_hptep = (__be64 *)(new->virt + (new_idx << 4));
+
+ replace_vpte = be64_to_cpu(new_hptep[0]);
+
+ if (replace_vpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
+ BUG_ON(new->order >= old->order);
+
+ if (replace_vpte & HPTE_V_BOLTED) {
+ if (vpte & HPTE_V_BOLTED)
+ /* Bolted collision, nothing we can do */
+ ret = -ENOSPC;
+ /* Discard the new HPTE */
+ goto out;
+ }
+
+ /* Discard the previous HPTE */
+ }
+
+ new_hptep[1] = cpu_to_be64(rpte);
+ new->rev[new_idx].guest_rpte = guest_rpte;
+ /* No need for a barrier, since new HPT isn't active */
+ new_hptep[0] = cpu_to_be64(vpte);
+ unlock_hpte(new_hptep, vpte);
+
+out:
+ unlock_hpte(hptep, vpte);
+ return ret;
+}
+
+static int resize_hpt_rehash(struct kvm_resize_hpt *resize)
+{
+ struct kvm *kvm = resize->kvm;
+ unsigned long i;
+ int rc;
+
+ /*
+ * resize_hpt_rehash_hpte() doesn't handle the new-format HPTEs
+ * that POWER9 uses, and could well hit a BUG_ON on POWER9.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ return -EIO;
+ for (i = 0; i < kvmppc_hpt_npte(&kvm->arch.hpt); i++) {
+ rc = resize_hpt_rehash_hpte(resize, i);
+ if (rc != 0)
+ return rc;
+ }
+
+ return 0;
+}
+
+static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
+{
+ struct kvm *kvm = resize->kvm;
+ struct kvm_hpt_info hpt_tmp;
+
+ /* Exchange the pending tables in the resize structure with
+ * the active tables */
+
+ resize_hpt_debug(resize, "resize_hpt_pivot()\n");
+
+ spin_lock(&kvm->mmu_lock);
+ asm volatile("ptesync" : : : "memory");
+
+ hpt_tmp = kvm->arch.hpt;
+ kvmppc_set_hpt(kvm, &resize->hpt);
+ resize->hpt = hpt_tmp;
+
+ spin_unlock(&kvm->mmu_lock);
+
+ synchronize_srcu_expedited(&kvm->srcu);
+
+ resize_hpt_debug(resize, "resize_hpt_pivot() done\n");
+}
+
+static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
+{
+ BUG_ON(kvm->arch.resize_hpt != resize);
+
+ if (!resize)
+ return;
+
+ if (resize->hpt.virt)
+ kvmppc_free_hpt(&resize->hpt);
+
+ kvm->arch.resize_hpt = NULL;
+ kfree(resize);
+}
+
+static void resize_hpt_prepare_work(struct work_struct *work)
+{
+ struct kvm_resize_hpt *resize = container_of(work,
+ struct kvm_resize_hpt,
+ work);
+ struct kvm *kvm = resize->kvm;
+ int err;
+
+ resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
+ resize->order);
+
+ err = resize_hpt_allocate(resize);
+
+ mutex_lock(&kvm->lock);
+
+ resize->error = err;
+ resize->prepare_done = true;
+
+ mutex_unlock(&kvm->lock);
+}
+
+long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
+ struct kvm_ppc_resize_hpt *rhpt)
+{
+ unsigned long flags = rhpt->flags;
+ unsigned long shift = rhpt->shift;
+ struct kvm_resize_hpt *resize;
+ int ret;
+
+ if (flags != 0)
+ return -EINVAL;
+
+ if (shift && ((shift < 18) || (shift > 46)))
+ return -EINVAL;
+
+ mutex_lock(&kvm->lock);
+
+ resize = kvm->arch.resize_hpt;
+
+ if (resize) {
+ if (resize->order == shift) {
+ /* Suitable resize in progress */
+ if (resize->prepare_done) {
+ ret = resize->error;
+ if (ret != 0)
+ resize_hpt_release(kvm, resize);
+ } else {
+ ret = 100; /* estimated time in ms */
+ }
+
+ goto out;
+ }
+
+ /* not suitable, cancel it */
+ resize_hpt_release(kvm, resize);
+ }
+
+ ret = 0;
+ if (!shift)
+ goto out; /* nothing to do */
+
+ /* start new resize */
+
+ resize = kzalloc(sizeof(*resize), GFP_KERNEL);
+ resize->order = shift;
+ resize->kvm = kvm;
+ INIT_WORK(&resize->work, resize_hpt_prepare_work);
+ kvm->arch.resize_hpt = resize;
+
+ schedule_work(&resize->work);
+
+ ret = 100; /* estimated time in ms */
+
+out:
+ mutex_unlock(&kvm->lock);
+ return ret;
+}
+
+static void resize_hpt_boot_vcpu(void *opaque)
+{
+ /* Nothing to do, just force a KVM exit */
+}
+
+long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
+ struct kvm_ppc_resize_hpt *rhpt)
+{
+ unsigned long flags = rhpt->flags;
+ unsigned long shift = rhpt->shift;
+ struct kvm_resize_hpt *resize;
+ long ret;
+
+ if (flags != 0)
+ return -EINVAL;
+
+ if (shift && ((shift < 18) || (shift > 46)))
+ return -EINVAL;
+
+ mutex_lock(&kvm->lock);
+
+ resize = kvm->arch.resize_hpt;
+
+ /* This shouldn't be possible */
+ ret = -EIO;
+ if (WARN_ON(!kvm->arch.hpte_setup_done))
+ goto out_no_hpt;
+
+ /* Stop VCPUs from running while we mess with the HPT */
+ kvm->arch.hpte_setup_done = 0;
+ smp_mb();
+
+ /* Boot all CPUs out of the guest so they re-read
+ * hpte_setup_done */
+ on_each_cpu(resize_hpt_boot_vcpu, NULL, 1);
+
+ ret = -ENXIO;
+ if (!resize || (resize->order != shift))
+ goto out;
+
+ ret = -EBUSY;
+ if (!resize->prepare_done)
+ goto out;
+
+ ret = resize->error;
+ if (ret != 0)
+ goto out;
+
+ ret = resize_hpt_rehash(resize);
+ if (ret != 0)
+ goto out;
+
+ resize_hpt_pivot(resize);
+
+out:
+ /* Let VCPUs run again */
+ kvm->arch.hpte_setup_done = 1;
+ smp_mb();
+out_no_hpt:
+ resize_hpt_release(kvm, resize);
+ mutex_unlock(&kvm->lock);
+ return ret;
+}
+
+/*
* Functions for reading and writing the hash table via reads and
* writes on a file descriptor.
*
@@ -1311,8 +1705,8 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
flags = ctx->flags;
i = ctx->index;
- hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
- revp = kvm->arch.revmap + i;
+ hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE));
+ revp = kvm->arch.hpt.rev + i;
lbuf = (unsigned long __user *)buf;
nb = 0;
@@ -1327,7 +1721,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
/* Skip uninteresting entries, i.e. clean on not-first pass */
if (!first_pass) {
- while (i < kvm->arch.hpt_npte &&
+ while (i < kvmppc_hpt_npte(&kvm->arch.hpt) &&
!hpte_dirty(revp, hptp)) {
++i;
hptp += 2;
@@ -1337,7 +1731,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
hdr.index = i;
/* Grab a series of valid entries */
- while (i < kvm->arch.hpt_npte &&
+ while (i < kvmppc_hpt_npte(&kvm->arch.hpt) &&
hdr.n_valid < 0xffff &&
nb + HPTE_SIZE < count &&
record_hpte(flags, hptp, hpte, revp, 1, first_pass)) {
@@ -1353,7 +1747,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
++revp;
}
/* Now skip invalid entries while we can */
- while (i < kvm->arch.hpt_npte &&
+ while (i < kvmppc_hpt_npte(&kvm->arch.hpt) &&
hdr.n_invalid < 0xffff &&
record_hpte(flags, hptp, hpte, revp, 0, first_pass)) {
/* found an invalid entry */
@@ -1374,7 +1768,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
}
/* Check if we've wrapped around the hash table */
- if (i >= kvm->arch.hpt_npte) {
+ if (i >= kvmppc_hpt_npte(&kvm->arch.hpt)) {
i = 0;
ctx->first_pass = 0;
break;
@@ -1433,11 +1827,11 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
err = -EINVAL;
i = hdr.index;
- if (i >= kvm->arch.hpt_npte ||
- i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte)
+ if (i >= kvmppc_hpt_npte(&kvm->arch.hpt) ||
+ i + hdr.n_valid + hdr.n_invalid > kvmppc_hpt_npte(&kvm->arch.hpt))
break;
- hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
+ hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE));
lbuf = (unsigned long __user *)buf;
for (j = 0; j < hdr.n_valid; ++j) {
__be64 hpte_v;
@@ -1624,8 +2018,9 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
kvm = p->kvm;
i = p->hpt_index;
- hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
- for (; len != 0 && i < kvm->arch.hpt_npte; ++i, hptp += 2) {
+ hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE));
+ for (; len != 0 && i < kvmppc_hpt_npte(&kvm->arch.hpt);
+ ++i, hptp += 2) {
if (!(be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)))
continue;
@@ -1635,7 +2030,7 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
cpu_relax();
v = be64_to_cpu(hptp[0]) & ~HPTE_V_HVLOCK;
hr = be64_to_cpu(hptp[1]);
- gr = kvm->arch.revmap[i].guest_rpte;
+ gr = kvm->arch.hpt.rev[i].guest_rpte;
unlock_hpte(hptp, v);
preempt_enable();
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index c379ff5a4438..491c5d8120f7 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -171,6 +171,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
goto fail;
}
+ ret = -ENOMEM;
stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
GFP_KERNEL);
if (!stt)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e4a79679342e..1e107ece4e37 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -182,7 +182,8 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
++vcpu->stat.halt_wakeup;
}
- if (kvmppc_ipi_thread(vcpu->arch.thread_cpu))
+ cpu = READ_ONCE(vcpu->arch.thread_cpu);
+ if (cpu >= 0 && kvmppc_ipi_thread(cpu))
return;
/* CPU points to the first thread of the core */
@@ -773,12 +774,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
}
tvcpu->arch.prodded = 1;
smp_mb();
- if (vcpu->arch.ceded) {
- if (swait_active(&vcpu->wq)) {
- swake_up(&vcpu->wq);
- vcpu->stat.halt_wakeup++;
- }
- }
+ if (tvcpu->arch.ceded)
+ kvmppc_fast_vcpu_kick_hv(tvcpu);
break;
case H_CONFER:
target = kvmppc_get_gpr(vcpu, 4);
@@ -2665,7 +2662,8 @@ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
int i;
for_each_runnable_thread(i, vcpu, vc) {
- if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded)
+ if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded ||
+ vcpu->arch.prodded)
return 1;
}
@@ -2851,7 +2849,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
break;
n_ceded = 0;
for_each_runnable_thread(i, v, vc) {
- if (!v->arch.pending_exceptions)
+ if (!v->arch.pending_exceptions && !v->arch.prodded)
n_ceded += v->arch.ceded;
else
v->arch.ceded = 0;
@@ -3199,12 +3197,23 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
goto out; /* another vcpu beat us to it */
/* Allocate hashed page table (if not done already) and reset it */
- if (!kvm->arch.hpt_virt) {
- err = kvmppc_alloc_hpt(kvm, NULL);
- if (err) {
+ if (!kvm->arch.hpt.virt) {
+ int order = KVM_DEFAULT_HPT_ORDER;
+ struct kvm_hpt_info info;
+
+ err = kvmppc_allocate_hpt(&info, order);
+ /* If we get here, it means userspace didn't specify a
+ * size explicitly. So, try successively smaller
+ * sizes if the default failed. */
+ while ((err == -ENOMEM) && --order >= PPC_MIN_HPT_ORDER)
+ err = kvmppc_allocate_hpt(&info, order);
+
+ if (err < 0) {
pr_err("KVM: Couldn't alloc HPT\n");
goto out;
}
+
+ kvmppc_set_hpt(kvm, &info);
}
/* Look up the memslot for guest physical address 0 */
@@ -3413,6 +3422,9 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
kvm->arch.lpcr = lpcr;
+ /* Initialization for future HPT resizes */
+ kvm->arch.resize_hpt = NULL;
+
/*
* Work out how many sets the TLB has, for the use of
* the TLB invalidation loop in book3s_hv_rmhandlers.S.
@@ -3469,7 +3481,7 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
if (kvm_is_radix(kvm))
kvmppc_free_radix(kvm);
else
- kvmppc_free_hpt(kvm);
+ kvmppc_free_hpt(&kvm->arch.hpt);
kvmppc_free_pimap(kvm);
}
@@ -3695,12 +3707,9 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
r = -EFAULT;
if (get_user(htab_order, (u32 __user *)argp))
break;
- r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
+ r = kvmppc_alloc_reset_hpt(kvm, htab_order);
if (r)
break;
- r = -EFAULT;
- if (put_user(htab_order, (u32 __user *)argp))
- break;
r = 0;
break;
}
@@ -3715,6 +3724,28 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
break;
}
+ case KVM_PPC_RESIZE_HPT_PREPARE: {
+ struct kvm_ppc_resize_hpt rhpt;
+
+ r = -EFAULT;
+ if (copy_from_user(&rhpt, argp, sizeof(rhpt)))
+ break;
+
+ r = kvm_vm_ioctl_resize_hpt_prepare(kvm, &rhpt);
+ break;
+ }
+
+ case KVM_PPC_RESIZE_HPT_COMMIT: {
+ struct kvm_ppc_resize_hpt rhpt;
+
+ r = -EFAULT;
+ if (copy_from_user(&rhpt, argp, sizeof(rhpt)))
+ break;
+
+ r = kvm_vm_ioctl_resize_hpt_commit(kvm, &rhpt);
+ break;
+ }
+
default:
r = -ENOTTY;
}
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 2f69fbc19bb0..c42a7e63b39e 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -52,19 +52,19 @@ static int __init early_parse_kvm_cma_resv(char *p)
}
early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
-struct page *kvm_alloc_hpt(unsigned long nr_pages)
+struct page *kvm_alloc_hpt_cma(unsigned long nr_pages)
{
VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES));
}
-EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
+EXPORT_SYMBOL_GPL(kvm_alloc_hpt_cma);
-void kvm_release_hpt(struct page *page, unsigned long nr_pages)
+void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages)
{
cma_release(kvm_cma, page, nr_pages);
}
-EXPORT_SYMBOL_GPL(kvm_release_hpt);
+EXPORT_SYMBOL_GPL(kvm_free_hpt_cma);
/**
* kvm_cma_reserve() - reserve area for kvm hash pagetable
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index b095afcd4309..6fca970373ee 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -86,10 +86,10 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
if (*rmap & KVMPPC_RMAP_PRESENT) {
i = *rmap & KVMPPC_RMAP_INDEX;
- head = &kvm->arch.revmap[i];
+ head = &kvm->arch.hpt.rev[i];
if (realmode)
head = real_vmalloc_addr(head);
- tail = &kvm->arch.revmap[head->back];
+ tail = &kvm->arch.hpt.rev[head->back];
if (realmode)
tail = real_vmalloc_addr(tail);
rev->forw = i;
@@ -154,8 +154,8 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
lock_rmap(rmap);
head = *rmap & KVMPPC_RMAP_INDEX;
- next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]);
- prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]);
+ next = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->forw]);
+ prev = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->back]);
next->back = rev->back;
prev->forw = rev->forw;
if (head == pte_index) {
@@ -292,11 +292,11 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
/* Find and lock the HPTEG slot to use */
do_insert:
- if (pte_index >= kvm->arch.hpt_npte)
+ if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
return H_PARAMETER;
if (likely((flags & H_EXACT) == 0)) {
pte_index &= ~7UL;
- hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
+ hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
for (i = 0; i < 8; ++i) {
if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 &&
try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
@@ -327,7 +327,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
}
pte_index += i;
} else {
- hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
+ hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
HPTE_V_ABSENT)) {
/* Lock the slot and check again */
@@ -344,7 +344,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
}
/* Save away the guest's idea of the second HPTE dword */
- rev = &kvm->arch.revmap[pte_index];
+ rev = &kvm->arch.hpt.rev[pte_index];
if (realmode)
rev = real_vmalloc_addr(rev);
if (rev) {
@@ -469,9 +469,9 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
if (kvm_is_radix(kvm))
return H_FUNCTION;
- if (pte_index >= kvm->arch.hpt_npte)
+ if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
return H_PARAMETER;
- hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
+ hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
cpu_relax();
pte = orig_pte = be64_to_cpu(hpte[0]);
@@ -487,7 +487,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
return H_NOT_FOUND;
}
- rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
v = pte & ~HPTE_V_HVLOCK;
if (v & HPTE_V_VALID) {
hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
@@ -557,13 +557,13 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
break;
}
if (req != 1 || flags == 3 ||
- pte_index >= kvm->arch.hpt_npte) {
+ pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) {
/* parameter error */
args[j] = ((0xa0 | flags) << 56) + pte_index;
ret = H_PARAMETER;
break;
}
- hp = (__be64 *) (kvm->arch.hpt_virt + (pte_index << 4));
+ hp = (__be64 *) (kvm->arch.hpt.virt + (pte_index << 4));
/* to avoid deadlock, don't spin except for first */
if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
if (n)
@@ -600,7 +600,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
}
args[j] = ((0x80 | flags) << 56) + pte_index;
- rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
note_hpte_modification(kvm, rev);
if (!(hp0 & HPTE_V_VALID)) {
@@ -657,10 +657,10 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
if (kvm_is_radix(kvm))
return H_FUNCTION;
- if (pte_index >= kvm->arch.hpt_npte)
+ if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
return H_PARAMETER;
- hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
+ hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
cpu_relax();
v = pte_v = be64_to_cpu(hpte[0]);
@@ -680,7 +680,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
/* Update guest view of 2nd HPTE dword */
mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
HPTE_R_KEY_HI | HPTE_R_KEY_LO;
- rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
if (rev) {
r = (rev->guest_rpte & ~mask) | bits;
rev->guest_rpte = r;
@@ -728,15 +728,15 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
if (kvm_is_radix(kvm))
return H_FUNCTION;
- if (pte_index >= kvm->arch.hpt_npte)
+ if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
return H_PARAMETER;
if (flags & H_READ_4) {
pte_index &= ~3;
n = 4;
}
- rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
for (i = 0; i < n; ++i, ++pte_index) {
- hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
+ hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
r = be64_to_cpu(hpte[1]);
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
@@ -769,11 +769,11 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
if (kvm_is_radix(kvm))
return H_FUNCTION;
- if (pte_index >= kvm->arch.hpt_npte)
+ if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
return H_PARAMETER;
- rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
- hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
+ rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
+ hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
cpu_relax();
v = be64_to_cpu(hpte[0]);
@@ -817,11 +817,11 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
if (kvm_is_radix(kvm))
return H_FUNCTION;
- if (pte_index >= kvm->arch.hpt_npte)
+ if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
return H_PARAMETER;
- rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
- hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
+ rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
+ hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
cpu_relax();
v = be64_to_cpu(hpte[0]);
@@ -970,7 +970,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
somask = (1UL << 28) - 1;
vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
}
- hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvm->arch.hpt_mask;
+ hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvmppc_hpt_mask(&kvm->arch.hpt);
avpn = slb_v & ~(somask >> 16); /* also includes B */
avpn |= (eaddr & somask) >> 16;
@@ -981,7 +981,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
val |= avpn;
for (;;) {
- hpte = (__be64 *)(kvm->arch.hpt_virt + (hash << 7));
+ hpte = (__be64 *)(kvm->arch.hpt.virt + (hash << 7));
for (i = 0; i < 16; i += 2) {
/* Read the PTE racily */
@@ -1017,7 +1017,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
if (val & HPTE_V_SECONDARY)
break;
val |= HPTE_V_SECONDARY;
- hash = hash ^ kvm->arch.hpt_mask;
+ hash = hash ^ kvmppc_hpt_mask(&kvm->arch.hpt);
}
return -1;
}
@@ -1066,14 +1066,14 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
return status; /* there really was no HPTE */
return 0; /* for prot fault, HPTE disappeared */
}
- hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
+ hpte = (__be64 *)(kvm->arch.hpt.virt + (index << 4));
v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
r = be64_to_cpu(hpte[1]);
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
v = hpte_new_to_old_v(v, r);
r = hpte_new_to_old_r(r);
}
- rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
+ rev = real_vmalloc_addr(&kvm->arch.hpt.rev[index]);
gr = rev->guest_rpte;
unlock_hpte(hpte, orig_v);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 29f43ed6d5eb..e78542d99cd6 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -35,7 +35,7 @@ int kvm_irq_bypass = 1;
EXPORT_SYMBOL(kvm_irq_bypass);
static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
- u32 new_irq);
+ u32 new_irq, bool check_resend);
static int xics_opal_set_server(unsigned int hw_irq, int server_cpu);
/* -- ICS routines -- */
@@ -44,20 +44,12 @@ static void ics_rm_check_resend(struct kvmppc_xics *xics,
{
int i;
- arch_spin_lock(&ics->lock);
-
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
struct ics_irq_state *state = &ics->irq_state[i];
-
- if (!state->resend)
- continue;
-
- arch_spin_unlock(&ics->lock);
- icp_rm_deliver_irq(xics, icp, state->number);
- arch_spin_lock(&ics->lock);
+ if (state->resend)
+ icp_rm_deliver_irq(xics, icp, state->number, true);
}
- arch_spin_unlock(&ics->lock);
}
/* -- ICP routines -- */
@@ -288,7 +280,7 @@ static bool icp_rm_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
}
static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
- u32 new_irq)
+ u32 new_irq, bool check_resend)
{
struct ics_irq_state *state;
struct kvmppc_ics *ics;
@@ -333,6 +325,10 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
}
}
+ if (check_resend)
+ if (!state->resend)
+ goto out;
+
/* Clear the resend bit of that interrupt */
state->resend = 0;
@@ -378,7 +374,9 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
*/
if (reject && reject != XICS_IPI) {
arch_spin_unlock(&ics->lock);
+ icp->n_reject++;
new_irq = reject;
+ check_resend = 0;
goto again;
}
} else {
@@ -386,10 +384,16 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
* We failed to deliver the interrupt we need to set the
* resend map bit and mark the ICS state as needing a resend
*/
- set_bit(ics->icsid, icp->resend_map);
state->resend = 1;
/*
+ * Make sure when checking resend, we don't miss the resend
+ * if resend_map bit is seen and cleared.
+ */
+ smp_wmb();
+ set_bit(ics->icsid, icp->resend_map);
+
+ /*
* If the need_resend flag got cleared in the ICP some time
* between icp_rm_try_to_deliver() atomic update and now, then
* we know it might have missed the resend_map bit. So we
@@ -397,7 +401,9 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
*/
smp_mb();
if (!icp->state.need_resend) {
+ state->resend = 0;
arch_spin_unlock(&ics->lock);
+ check_resend = 0;
goto again;
}
}
@@ -592,7 +598,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
/* Handle reject in real mode */
if (reject && reject != XICS_IPI) {
this_icp->n_reject++;
- icp_rm_deliver_irq(xics, icp, reject);
+ icp_rm_deliver_irq(xics, icp, reject, false);
}
/* Handle resends in real mode */
@@ -660,59 +666,45 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
*/
if (reject && reject != XICS_IPI) {
icp->n_reject++;
- icp_rm_deliver_irq(xics, icp, reject);
+ icp_rm_deliver_irq(xics, icp, reject, false);
}
bail:
return check_too_hard(xics, icp);
}
-int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+static int ics_rm_eoi(struct kvm_vcpu *vcpu, u32 irq)
{
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
struct kvmppc_icp *icp = vcpu->arch.icp;
struct kvmppc_ics *ics;
struct ics_irq_state *state;
- u32 irq = xirr & 0x00ffffff;
u16 src;
-
- if (!xics || !xics->real_mode)
- return H_TOO_HARD;
+ u32 pq_old, pq_new;
/*
- * ICP State: EOI
+ * ICS EOI handling: For LSI, if P bit is still set, we need to
+ * resend it.
*
- * Note: If EOI is incorrectly used by SW to lower the CPPR
- * value (ie more favored), we do not check for rejection of
- * a pending interrupt, this is a SW error and PAPR sepcifies
- * that we don't have to deal with it.
- *
- * The sending of an EOI to the ICS is handled after the
- * CPPR update
- *
- * ICP State: Down_CPPR which we handle
- * in a separate function as it's shared with H_CPPR.
+ * For MSI, we move Q bit into P (and clear Q). If it is set,
+ * resend it.
*/
- icp_rm_down_cppr(xics, icp, xirr >> 24);
- /* IPIs have no EOI */
- if (irq == XICS_IPI)
- goto bail;
- /*
- * EOI handling: If the interrupt is still asserted, we need to
- * resend it. We can take a lockless "peek" at the ICS state here.
- *
- * "Message" interrupts will never have "asserted" set
- */
ics = kvmppc_xics_find_ics(xics, irq, &src);
if (!ics)
goto bail;
+
state = &ics->irq_state[src];
- /* Still asserted, resend it */
- if (state->asserted) {
- icp->n_reject++;
- icp_rm_deliver_irq(xics, icp, irq);
- }
+ if (state->lsi)
+ pq_new = state->pq_state;
+ else
+ do {
+ pq_old = state->pq_state;
+ pq_new = pq_old >> 1;
+ } while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
+
+ if (pq_new & PQ_PRESENTED)
+ icp_rm_deliver_irq(xics, NULL, irq, false);
if (!hlist_empty(&vcpu->kvm->irq_ack_notifier_list)) {
icp->rm_action |= XICS_RM_NOTIFY_EOI;
@@ -733,10 +725,43 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
state->intr_cpu = -1;
}
}
+
bail:
return check_too_hard(xics, icp);
}
+int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ u32 irq = xirr & 0x00ffffff;
+
+ if (!xics || !xics->real_mode)
+ return H_TOO_HARD;
+
+ /*
+ * ICP State: EOI
+ *
+ * Note: If EOI is incorrectly used by SW to lower the CPPR
+ * value (ie more favored), we do not check for rejection of
+ * a pending interrupt, this is a SW error and PAPR specifies
+ * that we don't have to deal with it.
+ *
+ * The sending of an EOI to the ICS is handled after the
+ * CPPR update
+ *
+ * ICP State: Down_CPPR which we handle
+ * in a separate function as it's shared with H_CPPR.
+ */
+ icp_rm_down_cppr(xics, icp, xirr >> 24);
+
+ /* IPIs have no EOI */
+ if (irq == XICS_IPI)
+ return check_too_hard(xics, icp);
+
+ return ics_rm_eoi(vcpu, irq);
+}
+
unsigned long eoi_rc;
static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
@@ -823,14 +848,33 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
{
struct kvmppc_xics *xics;
struct kvmppc_icp *icp;
+ struct kvmppc_ics *ics;
+ struct ics_irq_state *state;
u32 irq;
+ u16 src;
+ u32 pq_old, pq_new;
irq = irq_map->v_hwirq;
xics = vcpu->kvm->arch.xics;
icp = vcpu->arch.icp;
kvmppc_rm_handle_irq_desc(irq_map->desc);
- icp_rm_deliver_irq(xics, icp, irq);
+
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics)
+ return 2;
+
+ state = &ics->irq_state[src];
+
+ /* only MSIs register bypass producers, so it must be MSI here */
+ do {
+ pq_old = state->pq_state;
+ pq_new = ((pq_old << 1) & 3) | PQ_PRESENTED;
+ } while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
+
+ /* Test P=1, Q=0, this is the only case where we present */
+ if (pq_new == PQ_PRESENTED)
+ icp_rm_deliver_irq(xics, icp, irq, false);
/* EOI the interrupt */
icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr,
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 1482961ceb4d..d4dfc0ca2a44 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -902,6 +902,69 @@ static void kvmppc_clear_debug(struct kvm_vcpu *vcpu)
}
}
+static int kvmppc_exit_pr_progint(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int exit_nr)
+{
+ enum emulation_result er;
+ ulong flags;
+ u32 last_inst;
+ int emul, r;
+
+ /*
+ * shadow_srr1 only contains valid flags if we came here via a program
+ * exception. The other exceptions (emulation assist, FP unavailable,
+ * etc.) do not provide flags in SRR1, so use an illegal-instruction
+ * exception when injecting a program interrupt into the guest.
+ */
+ if (exit_nr == BOOK3S_INTERRUPT_PROGRAM)
+ flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
+ else
+ flags = SRR1_PROGILL;
+
+ emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
+ if (emul != EMULATE_DONE)
+ return RESUME_GUEST;
+
+ if (kvmppc_get_msr(vcpu) & MSR_PR) {
+#ifdef EXIT_DEBUG
+ pr_info("Userspace triggered 0x700 exception at\n 0x%lx (0x%x)\n",
+ kvmppc_get_pc(vcpu), last_inst);
+#endif
+ if ((last_inst & 0xff0007ff) != (INS_DCBZ & 0xfffffff7)) {
+ kvmppc_core_queue_program(vcpu, flags);
+ return RESUME_GUEST;
+ }
+ }
+
+ vcpu->stat.emulated_inst_exits++;
+ er = kvmppc_emulate_instruction(run, vcpu);
+ switch (er) {
+ case EMULATE_DONE:
+ r = RESUME_GUEST_NV;
+ break;
+ case EMULATE_AGAIN:
+ r = RESUME_GUEST;
+ break;
+ case EMULATE_FAIL:
+ pr_crit("%s: emulation at %lx failed (%08x)\n",
+ __func__, kvmppc_get_pc(vcpu), last_inst);
+ kvmppc_core_queue_program(vcpu, flags);
+ r = RESUME_GUEST;
+ break;
+ case EMULATE_DO_MMIO:
+ run->exit_reason = KVM_EXIT_MMIO;
+ r = RESUME_HOST_NV;
+ break;
+ case EMULATE_EXIT_USER:
+ r = RESUME_HOST_NV;
+ break;
+ default:
+ BUG();
+ }
+
+ return r;
+}
+
int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int exit_nr)
{
@@ -1044,71 +1107,8 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case BOOK3S_INTERRUPT_PROGRAM:
case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
- {
- enum emulation_result er;
- ulong flags;
- u32 last_inst;
- int emul;
-
-program_interrupt:
- /*
- * shadow_srr1 only contains valid flags if we came here via
- * a program exception. The other exceptions (emulation assist,
- * FP unavailable, etc.) do not provide flags in SRR1, so use
- * an illegal-instruction exception when injecting a program
- * interrupt into the guest.
- */
- if (exit_nr == BOOK3S_INTERRUPT_PROGRAM)
- flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
- else
- flags = SRR1_PROGILL;
-
- emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
- if (emul != EMULATE_DONE) {
- r = RESUME_GUEST;
- break;
- }
-
- if (kvmppc_get_msr(vcpu) & MSR_PR) {
-#ifdef EXIT_DEBUG
- pr_info("Userspace triggered 0x700 exception at\n 0x%lx (0x%x)\n",
- kvmppc_get_pc(vcpu), last_inst);
-#endif
- if ((last_inst & 0xff0007ff) !=
- (INS_DCBZ & 0xfffffff7)) {
- kvmppc_core_queue_program(vcpu, flags);
- r = RESUME_GUEST;
- break;
- }
- }
-
- vcpu->stat.emulated_inst_exits++;
- er = kvmppc_emulate_instruction(run, vcpu);
- switch (er) {
- case EMULATE_DONE:
- r = RESUME_GUEST_NV;
- break;
- case EMULATE_AGAIN:
- r = RESUME_GUEST;
- break;
- case EMULATE_FAIL:
- printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
- __func__, kvmppc_get_pc(vcpu), last_inst);
- kvmppc_core_queue_program(vcpu, flags);
- r = RESUME_GUEST;
- break;
- case EMULATE_DO_MMIO:
- run->exit_reason = KVM_EXIT_MMIO;
- r = RESUME_HOST_NV;
- break;
- case EMULATE_EXIT_USER:
- r = RESUME_HOST_NV;
- break;
- default:
- BUG();
- }
+ r = kvmppc_exit_pr_progint(run, vcpu, exit_nr);
break;
- }
case BOOK3S_INTERRUPT_SYSCALL:
{
u32 last_sc;
@@ -1185,7 +1185,7 @@ program_interrupt:
emul = kvmppc_get_last_inst(vcpu, INST_GENERIC,
&last_inst);
if (emul == EMULATE_DONE)
- goto program_interrupt;
+ r = kvmppc_exit_pr_progint(run, vcpu, exit_nr);
else
r = RESUME_GUEST;
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 20dff102a06f..e48803e2918d 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -63,7 +63,7 @@
/* -- ICS routines -- */
static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
- u32 new_irq);
+ u32 new_irq, bool check_resend);
/*
* Return value ideally indicates how the interrupt was handled, but no
@@ -75,6 +75,7 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
struct ics_irq_state *state;
struct kvmppc_ics *ics;
u16 src;
+ u32 pq_old, pq_new;
XICS_DBG("ics deliver %#x (level: %d)\n", irq, level);
@@ -87,25 +88,41 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
if (!state->exists)
return -EINVAL;
+ if (level == KVM_INTERRUPT_SET_LEVEL || level == KVM_INTERRUPT_SET)
+ level = 1;
+ else if (level == KVM_INTERRUPT_UNSET)
+ level = 0;
/*
- * We set state->asserted locklessly. This should be fine as
- * we are the only setter, thus concurrent access is undefined
- * to begin with.
+ * Take other values the same as 1, consistent with original code.
+ * maybe WARN here?
*/
- if ((level == 1 && state->lsi) || level == KVM_INTERRUPT_SET_LEVEL)
- state->asserted = 1;
- else if (level == 0 || level == KVM_INTERRUPT_UNSET) {
- state->asserted = 0;
+
+ if (!state->lsi && level == 0) /* noop for MSI */
return 0;
- }
+
+ do {
+ pq_old = state->pq_state;
+ if (state->lsi) {
+ if (level) {
+ if (pq_old & PQ_PRESENTED)
+ /* Setting already set LSI ... */
+ return 0;
+
+ pq_new = PQ_PRESENTED;
+ } else
+ pq_new = 0;
+ } else
+ pq_new = ((pq_old << 1) & 3) | PQ_PRESENTED;
+ } while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
+
+ /* Test P=1, Q=0, this is the only case where we present */
+ if (pq_new == PQ_PRESENTED)
+ icp_deliver_irq(xics, NULL, irq, false);
/* Record which CPU this arrived on for passed-through interrupts */
if (state->host_irq)
state->intr_cpu = raw_smp_processor_id();
- /* Attempt delivery */
- icp_deliver_irq(xics, NULL, irq);
-
return 0;
}
@@ -114,29 +131,14 @@ static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
{
int i;
- unsigned long flags;
-
- local_irq_save(flags);
- arch_spin_lock(&ics->lock);
-
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
struct ics_irq_state *state = &ics->irq_state[i];
-
- if (!state->resend)
- continue;
-
- XICS_DBG("resend %#x prio %#x\n", state->number,
- state->priority);
-
- arch_spin_unlock(&ics->lock);
- local_irq_restore(flags);
- icp_deliver_irq(xics, icp, state->number);
- local_irq_save(flags);
- arch_spin_lock(&ics->lock);
+ if (state->resend) {
+ XICS_DBG("resend %#x prio %#x\n", state->number,
+ state->priority);
+ icp_deliver_irq(xics, icp, state->number, true);
+ }
}
-
- arch_spin_unlock(&ics->lock);
- local_irq_restore(flags);
}
static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
@@ -155,6 +157,7 @@ static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
deliver = false;
if ((state->masked_pending || state->resend) && priority != MASKED) {
state->masked_pending = 0;
+ state->resend = 0;
deliver = true;
}
@@ -189,7 +192,7 @@ int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority)
state->masked_pending, state->resend);
if (write_xive(xics, ics, state, server, priority, priority))
- icp_deliver_irq(xics, icp, irq);
+ icp_deliver_irq(xics, icp, irq, false);
return 0;
}
@@ -242,7 +245,7 @@ int kvmppc_xics_int_on(struct kvm *kvm, u32 irq)
if (write_xive(xics, ics, state, state->server, state->saved_priority,
state->saved_priority))
- icp_deliver_irq(xics, icp, irq);
+ icp_deliver_irq(xics, icp, irq, false);
return 0;
}
@@ -376,7 +379,7 @@ static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
}
static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
- u32 new_irq)
+ u32 new_irq, bool check_resend)
{
struct ics_irq_state *state;
struct kvmppc_ics *ics;
@@ -422,6 +425,10 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
}
}
+ if (check_resend)
+ if (!state->resend)
+ goto out;
+
/* Clear the resend bit of that interrupt */
state->resend = 0;
@@ -470,6 +477,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
arch_spin_unlock(&ics->lock);
local_irq_restore(flags);
new_irq = reject;
+ check_resend = 0;
goto again;
}
} else {
@@ -477,10 +485,16 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
* We failed to deliver the interrupt we need to set the
* resend map bit and mark the ICS state as needing a resend
*/
- set_bit(ics->icsid, icp->resend_map);
state->resend = 1;
/*
+ * Make sure when checking resend, we don't miss the resend
+ * if resend_map bit is seen and cleared.
+ */
+ smp_wmb();
+ set_bit(ics->icsid, icp->resend_map);
+
+ /*
* If the need_resend flag got cleared in the ICP some time
* between icp_try_to_deliver() atomic update and now, then
* we know it might have missed the resend_map bit. So we
@@ -488,8 +502,10 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
*/
smp_mb();
if (!icp->state.need_resend) {
+ state->resend = 0;
arch_spin_unlock(&ics->lock);
local_irq_restore(flags);
+ check_resend = 0;
goto again;
}
}
@@ -681,7 +697,7 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
/* Handle reject */
if (reject && reject != XICS_IPI)
- icp_deliver_irq(xics, icp, reject);
+ icp_deliver_irq(xics, icp, reject, false);
/* Handle resend */
if (resend)
@@ -761,17 +777,54 @@ static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
* attempt (see comments in icp_deliver_irq).
*/
if (reject && reject != XICS_IPI)
- icp_deliver_irq(xics, icp, reject);
+ icp_deliver_irq(xics, icp, reject, false);
}
-static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+static int ics_eoi(struct kvm_vcpu *vcpu, u32 irq)
{
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
struct kvmppc_icp *icp = vcpu->arch.icp;
struct kvmppc_ics *ics;
struct ics_irq_state *state;
- u32 irq = xirr & 0x00ffffff;
u16 src;
+ u32 pq_old, pq_new;
+
+ /*
+ * ICS EOI handling: For LSI, if P bit is still set, we need to
+ * resend it.
+ *
+ * For MSI, we move Q bit into P (and clear Q). If it is set,
+ * resend it.
+ */
+
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics) {
+ XICS_DBG("ios_eoi: IRQ 0x%06x not found !\n", irq);
+ return H_PARAMETER;
+ }
+ state = &ics->irq_state[src];
+
+ if (state->lsi)
+ pq_new = state->pq_state;
+ else
+ do {
+ pq_old = state->pq_state;
+ pq_new = pq_old >> 1;
+ } while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
+
+ if (pq_new & PQ_PRESENTED)
+ icp_deliver_irq(xics, icp, irq, false);
+
+ kvm_notify_acked_irq(vcpu->kvm, 0, irq);
+
+ return H_SUCCESS;
+}
+
+static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ u32 irq = xirr & 0x00ffffff;
XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr);
@@ -794,26 +847,8 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
/* IPIs have no EOI */
if (irq == XICS_IPI)
return H_SUCCESS;
- /*
- * EOI handling: If the interrupt is still asserted, we need to
- * resend it. We can take a lockless "peek" at the ICS state here.
- *
- * "Message" interrupts will never have "asserted" set
- */
- ics = kvmppc_xics_find_ics(xics, irq, &src);
- if (!ics) {
- XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq);
- return H_PARAMETER;
- }
- state = &ics->irq_state[src];
- /* Still asserted, resend it */
- if (state->asserted)
- icp_deliver_irq(xics, icp, irq);
-
- kvm_notify_acked_irq(vcpu->kvm, 0, irq);
-
- return H_SUCCESS;
+ return ics_eoi(vcpu, irq);
}
int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
@@ -832,10 +867,6 @@ int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
icp->n_rm_check_resend++;
icp_check_resend(xics, icp->rm_resend_icp);
}
- if (icp->rm_action & XICS_RM_REJECT) {
- icp->n_rm_reject++;
- icp_deliver_irq(xics, icp, icp->rm_reject);
- }
if (icp->rm_action & XICS_RM_NOTIFY_EOI) {
icp->n_rm_notify_eoi++;
kvm_notify_acked_irq(vcpu->kvm, 0, icp->rm_eoied_irq);
@@ -920,7 +951,7 @@ static int xics_debug_show(struct seq_file *m, void *private)
int icsid, i;
unsigned long flags;
unsigned long t_rm_kick_vcpu, t_rm_check_resend;
- unsigned long t_rm_reject, t_rm_notify_eoi;
+ unsigned long t_rm_notify_eoi;
unsigned long t_reject, t_check_resend;
if (!kvm)
@@ -929,7 +960,6 @@ static int xics_debug_show(struct seq_file *m, void *private)
t_rm_kick_vcpu = 0;
t_rm_notify_eoi = 0;
t_rm_check_resend = 0;
- t_rm_reject = 0;
t_check_resend = 0;
t_reject = 0;
@@ -952,14 +982,13 @@ static int xics_debug_show(struct seq_file *m, void *private)
t_rm_kick_vcpu += icp->n_rm_kick_vcpu;
t_rm_notify_eoi += icp->n_rm_notify_eoi;
t_rm_check_resend += icp->n_rm_check_resend;
- t_rm_reject += icp->n_rm_reject;
t_check_resend += icp->n_check_resend;
t_reject += icp->n_reject;
}
- seq_printf(m, "ICP Guest->Host totals: kick_vcpu=%lu check_resend=%lu reject=%lu notify_eoi=%lu\n",
+ seq_printf(m, "ICP Guest->Host totals: kick_vcpu=%lu check_resend=%lu notify_eoi=%lu\n",
t_rm_kick_vcpu, t_rm_check_resend,
- t_rm_reject, t_rm_notify_eoi);
+ t_rm_notify_eoi);
seq_printf(m, "ICP Real Mode totals: check_resend=%lu resend=%lu\n",
t_check_resend, t_reject);
for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) {
@@ -977,9 +1006,9 @@ static int xics_debug_show(struct seq_file *m, void *private)
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
struct ics_irq_state *irq = &ics->irq_state[i];
- seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x asserted %d resend %d masked pending %d\n",
+ seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x pq_state %d resend %d masked pending %d\n",
irq->number, irq->server, irq->priority,
- irq->saved_priority, irq->asserted,
+ irq->saved_priority, irq->pq_state,
irq->resend, irq->masked_pending);
}
@@ -1198,10 +1227,17 @@ static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
val |= prio << KVM_XICS_PRIORITY_SHIFT;
if (irqp->lsi) {
val |= KVM_XICS_LEVEL_SENSITIVE;
- if (irqp->asserted)
+ if (irqp->pq_state & PQ_PRESENTED)
val |= KVM_XICS_PENDING;
} else if (irqp->masked_pending || irqp->resend)
val |= KVM_XICS_PENDING;
+
+ if (irqp->pq_state & PQ_PRESENTED)
+ val |= KVM_XICS_PRESENTED;
+
+ if (irqp->pq_state & PQ_QUEUED)
+ val |= KVM_XICS_QUEUED;
+
ret = 0;
}
arch_spin_unlock(&ics->lock);
@@ -1253,18 +1289,20 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
irqp->resend = 0;
irqp->masked_pending = 0;
irqp->lsi = 0;
- irqp->asserted = 0;
- if (val & KVM_XICS_LEVEL_SENSITIVE) {
+ irqp->pq_state = 0;
+ if (val & KVM_XICS_LEVEL_SENSITIVE)
irqp->lsi = 1;
- if (val & KVM_XICS_PENDING)
- irqp->asserted = 1;
- }
+ /* If PENDING, set P in case P is not saved because of old code */
+ if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
+ irqp->pq_state |= PQ_PRESENTED;
+ if (val & KVM_XICS_QUEUED)
+ irqp->pq_state |= PQ_QUEUED;
irqp->exists = 1;
arch_spin_unlock(&ics->lock);
local_irq_restore(flags);
if (val & KVM_XICS_PENDING)
- icp_deliver_irq(xics, NULL, irqp->number);
+ icp_deliver_irq(xics, NULL, irqp->number, false);
return 0;
}
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index 2a50320b55ca..ec5474cf70c6 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -31,16 +31,19 @@
/* Priority value to use for disabling an interrupt */
#define MASKED 0xff
+#define PQ_PRESENTED 1
+#define PQ_QUEUED 2
+
/* State for one irq source */
struct ics_irq_state {
u32 number;
u32 server;
+ u32 pq_state;
u8 priority;
u8 saved_priority;
u8 resend;
u8 masked_pending;
u8 lsi; /* level-sensitive interrupt */
- u8 asserted; /* Only for LSI */
u8 exists;
int intr_cpu;
u32 host_irq;
@@ -73,7 +76,6 @@ struct kvmppc_icp {
*/
#define XICS_RM_KICK_VCPU 0x1
#define XICS_RM_CHECK_RESEND 0x2
-#define XICS_RM_REJECT 0x4
#define XICS_RM_NOTIFY_EOI 0x8
u32 rm_action;
struct kvm_vcpu *rm_kick_target;
@@ -84,7 +86,6 @@ struct kvmppc_icp {
/* Counters for each reason we exited real mode */
unsigned long n_rm_kick_vcpu;
unsigned long n_rm_check_resend;
- unsigned long n_rm_reject;
unsigned long n_rm_notify_eoi;
/* Counters for handling ICP processing in real mode */
unsigned long n_check_resend;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 40a5b2d75ed1..2b38d824e9e5 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -511,6 +511,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ONE_REG:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
+ case KVM_CAP_IMMEDIATE_EXIT:
r = 1;
break;
case KVM_CAP_PPC_PAIRED_SINGLES:
@@ -612,6 +613,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SPAPR_MULTITCE:
r = 1;
break;
+ case KVM_CAP_SPAPR_RESIZE_HPT:
+ /* Disable this on POWER9 until code handles new HPTE format */
+ r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300);
+ break;
#endif
case KVM_CAP_PPC_HTM:
r = cpu_has_feature(CPU_FTR_TM_COMP) &&
@@ -1114,7 +1119,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
#endif
}
- r = kvmppc_vcpu_run(run, vcpu);
+ if (run->immediate_exit)
+ r = -EINTR;
+ else
+ r = kvmppc_vcpu_run(run, vcpu);
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);

Privacy Policy