Merge branch 'topic/ppc-kvm' into next

author Michael Ellerman <mpe@ellerman.id.au>

Thu, 19 May 2022 13:10:42 +0000 (23:10 +1000)

committer Michael Ellerman <mpe@ellerman.id.au>

Thu, 19 May 2022 13:10:42 +0000 (23:10 +1000)
author Michael Ellerman <mpe@ellerman.id.au>
Thu, 19 May 2022 13:10:42 +0000 (23:10 +1000)
committer Michael Ellerman <mpe@ellerman.id.au>
Thu, 19 May 2022 13:10:42 +0000 (23:10 +1000)
diff --combined arch/powerpc/kernel/iommu.c

index d440a24,6e090e8..7e56ddb
--- 1/arch/powerpc/kernel/iommu.c
--- 2/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@@ -27,6 -27,7 +27,6 @@@
   #include <linux/sched.h>
   #include <linux/debugfs.h>
   #include <asm/io.h>
- -#include <asm/prom.h>
   #include <asm/iommu.h>
   #include <asm/pci-bridge.h>
   #include <asm/machdep.h>
@@@ -1064,7 -1065,7 +1064,7 @@@ extern long iommu_tce_xchg_no_kill(stru
         long ret;
         unsigned long size = 0;
   
-       ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, false);
+       ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction);
         if (!ret && ((*direction == DMA_FROM_DEVICE) ||
                         (*direction == DMA_BIDIRECTIONAL)) &&
                         !mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift,
@@@ -1079,7 -1080,7 +1079,7 @@@ void iommu_tce_kill(struct iommu_table 
                 unsigned long entry, unsigned long pages)
   {
         if (tbl->it_ops->tce_kill)
-               tbl->it_ops->tce_kill(tbl, entry, pages, false);
+               tbl->it_ops->tce_kill(tbl, entry, pages);
   }
   EXPORT_SYMBOL_GPL(iommu_tce_kill);
   
diff --combined arch/powerpc/kvm/book3s_64_mmu_hv.c

index 1137c4d,c036b1a..514fd45
--- 1/arch/powerpc/kvm/book3s_64_mmu_hv.c
--- 2/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@@ -58,7 -58,7 +58,7 @@@ struct kvm_resize_hpt 
         /* Possible values and their usage:
          *  <0     an error occurred during allocation,
          *  -EBUSY allocation is in the progress,
- -       *  0      allocation made successfuly.
+ +       *  0      allocation made successfully.
          */
         int error;
   
@@@ -256,26 -256,34 +256,34 @@@ void kvmppc_map_vrma(struct kvm_vcpu *v
   
   int kvmppc_mmu_hv_init(void)
   {
-       unsigned long host_lpid, rsvd_lpid;
+       unsigned long nr_lpids;
   
         if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
                 return -EINVAL;
   
-       host_lpid = 0;
-       if (cpu_has_feature(CPU_FTR_HVMODE))
-               host_lpid = mfspr(SPRN_LPID);
+       if (cpu_has_feature(CPU_FTR_HVMODE)) {
+               if (WARN_ON(mfspr(SPRN_LPID) != 0))
+                       return -EINVAL;
+               nr_lpids = 1UL << mmu_lpid_bits;
+       } else {
+               nr_lpids = 1UL << KVM_MAX_NESTED_GUESTS_SHIFT;
+       }
   
-       /* POWER8 and above have 12-bit LPIDs (10-bit in POWER7) */
-       if (cpu_has_feature(CPU_FTR_ARCH_207S))
-               rsvd_lpid = LPID_RSVD;
-       else
-               rsvd_lpid = LPID_RSVD_POWER7;
+       if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+               /* POWER7 has 10-bit LPIDs, POWER8 has 12-bit LPIDs */
+               if (cpu_has_feature(CPU_FTR_ARCH_207S))
+                       WARN_ON(nr_lpids != 1UL << 12);
+               else
+                       WARN_ON(nr_lpids != 1UL << 10);
   
-       kvmppc_init_lpid(rsvd_lpid + 1);
+               /*
+                * Reserve the last implemented LPID use in partition
+                * switching for POWER7 and POWER8.
+                */
+               nr_lpids -= 1;
+       }
   
-       kvmppc_claim_lpid(host_lpid);
-       /* rsvd_lpid is reserved for use in partition switching */
-       kvmppc_claim_lpid(rsvd_lpid);
+       kvmppc_init_lpid(nr_lpids);
   
         return 0;
   }
@@@ -879,7 -887,7 +887,7 @@@ static bool kvm_age_rmapp(struct kvm *k
         struct revmap_entry *rev = kvm->arch.hpt.rev;
         unsigned long head, i, j;
         __be64 *hptep;
-       int ret = 0;
+       bool ret = false;
         unsigned long *rmapp;
   
         rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
@@@ -887,7 -895,7 +895,7 @@@
         lock_rmap(rmapp);
         if (*rmapp & KVMPPC_RMAP_REFERENCED) {
                 *rmapp &= ~KVMPPC_RMAP_REFERENCED;
-               ret = 1;
+               ret = true;
         }
         if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
                 unlock_rmap(rmapp);
@@@ -919,7 -927,7 +927,7 @@@
                                 rev[i].guest_rpte |= HPTE_R_R;
                                 note_hpte_modification(kvm, &rev[i]);
                         }
-                       ret = 1;
+                       ret = true;
                 }
                 __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
         } while ((i = j) != head);
diff --combined arch/powerpc/kvm/book3s_hv.c

index 5ea107d,7f8cf19..e08fb31
--- 1/arch/powerpc/kvm/book3s_hv.c
--- 2/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@@ -42,7 -42,6 +42,7 @@@
   #include <linux/module.h>
   #include <linux/compiler.h>
   #include <linux/of.h>
+ +#include <linux/irqdomain.h>
   
   #include <asm/ftrace.h>
   #include <asm/reg.h>
@@@ -1327,6 -1326,12 +1327,12 @@@ static int kvmppc_hcall_impl_hv(unsigne
         case H_CONFER:
         case H_REGISTER_VPA:
         case H_SET_MODE:
+ #ifdef CONFIG_SPAPR_TCE_IOMMU
+       case H_GET_TCE:
+       case H_PUT_TCE:
+       case H_PUT_TCE_INDIRECT:
+       case H_STUFF_TCE:
+ #endif
         case H_LOGICAL_CI_LOAD:
         case H_LOGICAL_CI_STORE:
   #ifdef CONFIG_KVM_XICS
@@@ -2835,7 -2840,7 +2841,7 @@@ static int kvmppc_core_vcpu_create_hv(s
          * to trap and then we emulate them.
          */
         vcpu->arch.hfscr = HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB |
-               HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX;
+               HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP;
         if (cpu_has_feature(CPU_FTR_HVMODE)) {
                 vcpu->arch.hfscr &= mfspr(SPRN_HFSCR);
   #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@@ -3968,6 -3973,7 +3974,7 @@@ static int kvmhv_vcpu_entry_p9_nested(s
   
         kvmhv_save_hv_regs(vcpu, &hvregs);
         hvregs.lpcr = lpcr;
+       hvregs.amor = ~0;
         vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
         hvregs.version = HV_GUEST_STATE_VERSION;
         if (vcpu->arch.nested) {
@@@ -4030,6 -4036,8 +4037,8 @@@
   static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
                          unsigned long lpcr, u64 *tb)
   {
+       struct kvm *kvm = vcpu->kvm;
+       struct kvm_nested_guest *nested = vcpu->arch.nested;
         u64 next_timer;
         int trap;
   
@@@ -4049,34 -4057,61 +4058,61 @@@
                 trap = kvmhv_vcpu_entry_p9_nested(vcpu, time_limit, lpcr, tb);
   
                 /* H_CEDE has to be handled now, not later */
-               if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
+               if (trap == BOOK3S_INTERRUPT_SYSCALL && !nested &&
                     kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
                         kvmppc_cede(vcpu);
                         kvmppc_set_gpr(vcpu, 3, 0);
                         trap = 0;
                 }
   
-       } else {
-               struct kvm *kvm = vcpu->kvm;
+       } else if (nested) {
+               __this_cpu_write(cpu_in_guest, kvm);
+               trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr, tb);
+               __this_cpu_write(cpu_in_guest, NULL);
   
+       } else {
                 kvmppc_xive_push_vcpu(vcpu);
   
                 __this_cpu_write(cpu_in_guest, kvm);
                 trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr, tb);
                 __this_cpu_write(cpu_in_guest, NULL);
   
-               if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
+               if (trap == BOOK3S_INTERRUPT_SYSCALL &&
                     !(vcpu->arch.shregs.msr & MSR_PR)) {
                         unsigned long req = kvmppc_get_gpr(vcpu, 3);
   
-                       /* H_CEDE has to be handled now, not later */
+                       /*
+                        * XIVE rearm and XICS hcalls must be handled
+                        * before xive context is pulled (is this
+                        * true?)
+                        */
                         if (req == H_CEDE) {
+                               /* H_CEDE has to be handled now */
                                 kvmppc_cede(vcpu);
-                               kvmppc_xive_rearm_escalation(vcpu); /* may un-cede */
+                               if (!kvmppc_xive_rearm_escalation(vcpu)) {
+                                       /*
+                                        * Pending escalation so abort
+                                        * the cede.
+                                        */
+                                       vcpu->arch.ceded = 0;
+                               }
                                 kvmppc_set_gpr(vcpu, 3, 0);
                                 trap = 0;
   
-                       /* XICS hcalls must be handled before xive is pulled */
+                       } else if (req == H_ENTER_NESTED) {
+                               /*
+                                * L2 should not run with the L1
+                                * context so rearm and pull it.
+                                */
+                               if (!kvmppc_xive_rearm_escalation(vcpu)) {
+                                       /*
+                                        * Pending escalation so abort
+                                        * H_ENTER_NESTED.
+                                        */
+                                       kvmppc_set_gpr(vcpu, 3, 0);
+                                       trap = 0;
+                               }
+ 
                         } else if (hcall_is_xics(req)) {
                                 int ret;
   
@@@ -4234,13 -4269,13 +4270,13 @@@ static void kvmppc_vcore_blocked(struc
         start_wait = ktime_get();
   
         vc->vcore_state = VCORE_SLEEPING;
-       trace_kvmppc_vcore_blocked(vc, 0);
+       trace_kvmppc_vcore_blocked(vc->runner, 0);
         spin_unlock(&vc->lock);
         schedule();
         finish_rcuwait(&vc->wait);
         spin_lock(&vc->lock);
         vc->vcore_state = VCORE_INACTIVE;
-       trace_kvmppc_vcore_blocked(vc, 1);
+       trace_kvmppc_vcore_blocked(vc->runner, 1);
         ++vc->runner->stat.halt_successful_wait;
   
         cur = ktime_get();
@@@ -4520,9 -4555,14 +4556,14 @@@ int kvmhv_run_single_vcpu(struct kvm_vc
   
         if (!nested) {
                 kvmppc_core_prepare_to_enter(vcpu);
-               if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
-                            &vcpu->arch.pending_exceptions))
+               if (vcpu->arch.shregs.msr & MSR_EE) {
+                       if (xive_interrupt_pending(vcpu))
+                               kvmppc_inject_interrupt_hv(vcpu,
+                                               BOOK3S_INTERRUPT_EXTERNAL, 0);
+               } else if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
+                            &vcpu->arch.pending_exceptions)) {
                         lpcr |= LPCR_MER;
+               }
         } else if (vcpu->arch.pending_exceptions ||
                    vcpu->arch.doorbell_request ||
                    xive_interrupt_pending(vcpu)) {
@@@ -4620,9 -4660,9 +4661,9 @@@
                         if (kvmppc_vcpu_check_block(vcpu))
                                 break;
   
-                       trace_kvmppc_vcore_blocked(vc, 0);
+                       trace_kvmppc_vcore_blocked(vcpu, 0);
                         schedule();
-                       trace_kvmppc_vcore_blocked(vc, 1);
+                       trace_kvmppc_vcore_blocked(vcpu, 1);
                 }
                 finish_rcuwait(wait);
         }
@@@ -5284,6 -5324,10 +5325,10 @@@ static int kvmppc_core_init_vm_hv(struc
                 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
                 lpcr &= LPCR_PECE | LPCR_LPES;
         } else {
+               /*
+                * The L2 LPES mode will be set by the L0 according to whether
+                * or not it needs to take external interrupts in HV mode.
+                */
                 lpcr = 0;
         }
         lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
diff --combined arch/powerpc/kvm/book3s_hv_nested.c

index c943a05,ce08573..0644732
--- 1/arch/powerpc/kvm/book3s_hv_nested.c
--- 2/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@@ -261,8 -261,7 +261,7 @@@ static void load_l2_hv_regs(struct kvm_
         /*
          * Don't let L1 change LPCR bits for the L2 except these:
          */
-       mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
-               LPCR_LPES | LPCR_MER;
+       mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | LPCR_MER;
   
         /*
          * Additional filtering is required depending on hardware
@@@ -306,10 -305,10 +305,10 @@@ long kvmhv_enter_nested_guest(struct kv
         /* copy parameters in */
         hv_ptr = kvmppc_get_gpr(vcpu, 4);
         regs_ptr = kvmppc_get_gpr(vcpu, 5);
- -      vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ +      kvm_vcpu_srcu_read_lock(vcpu);
         err = kvmhv_read_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
                                               hv_ptr, regs_ptr);
- -      srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ +      kvm_vcpu_srcu_read_unlock(vcpu);
         if (err)
                 return H_PARAMETER;
   
@@@ -410,10 -409,10 +409,10 @@@
                 byteswap_hv_regs(&l2_hv);
                 byteswap_pt_regs(&l2_regs);
         }
- -      vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ +      kvm_vcpu_srcu_read_lock(vcpu);
         err = kvmhv_write_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
                                                hv_ptr, regs_ptr);
- -      srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ +      kvm_vcpu_srcu_read_unlock(vcpu);
         if (err)
                 return H_AUTHORITY;
   
@@@ -439,10 -438,11 +438,11 @@@ long kvmhv_nested_init(void
         if (!radix_enabled())
                 return -ENODEV;
   
-       /* find log base 2 of KVMPPC_NR_LPIDS, rounding up */
-       ptb_order = __ilog2(KVMPPC_NR_LPIDS - 1) + 1;
-       if (ptb_order < 8)
-               ptb_order = 8;
+       /* Partition table entry is 1<<4 bytes in size, hence the 4. */
+       ptb_order = KVM_MAX_NESTED_GUESTS_SHIFT + 4;
+       /* Minimum partition table size is 1<<12 bytes */
+       if (ptb_order < 12)
+               ptb_order = 12;
         pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order,
                                        GFP_KERNEL);
         if (!pseries_partition_tb) {
@@@ -450,7 -450,7 +450,7 @@@
                 return -ENOMEM;
         }
   
-       ptcr = __pa(pseries_partition_tb) | (ptb_order - 8);
+       ptcr = __pa(pseries_partition_tb) | (ptb_order - 12);
         rc = plpar_hcall_norets(H_SET_PARTITION_TABLE, ptcr);
         if (rc != H_SUCCESS) {
                 pr_err("kvm-hv: Parent hypervisor does not support nesting (rc=%ld)\n",
@@@ -521,11 -521,6 +521,6 @@@ static void kvmhv_set_nested_ptbl(struc
         kvmhv_set_ptbl_entry(gp->shadow_lpid, dw0, gp->process_table);
   }
   
- void kvmhv_vm_nested_init(struct kvm *kvm)
- {
-       kvm->arch.max_nested_lpid = -1;
- }
- 
   /*
    * Handle the H_SET_PARTITION_TABLE hcall.
    * r4 = guest real address of partition table + log_2(size) - 12
@@@ -539,16 -534,14 +534,14 @@@ long kvmhv_set_partition_table(struct k
         long ret = H_SUCCESS;
   
         srcu_idx = srcu_read_lock(&kvm->srcu);
-       /*
-        * Limit the partition table to 4096 entries (because that's what
-        * hardware supports), and check the base address.
-        */
-       if ((ptcr & PRTS_MASK) > 12 - 8 ||
+       /* Check partition size and base address. */
+       if ((ptcr & PRTS_MASK) + 12 - 4 > KVM_MAX_NESTED_GUESTS_SHIFT ||
             !kvm_is_visible_gfn(vcpu->kvm, (ptcr & PRTB_MASK) >> PAGE_SHIFT))
                 ret = H_PARAMETER;
         srcu_read_unlock(&kvm->srcu, srcu_idx);
         if (ret == H_SUCCESS)
                 kvm->arch.l1_ptcr = ptcr;
+ 
         return ret;
   }
   
@@@ -600,16 -593,16 +593,16 @@@ long kvmhv_copy_tofrom_guest_nested(str
                         goto not_found;
   
                 /* Write what was loaded into our buffer back to the L1 guest */
- -              vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ +              kvm_vcpu_srcu_read_lock(vcpu);
                 rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n);
- -              srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ +              kvm_vcpu_srcu_read_unlock(vcpu);
                 if (rc)
                         goto not_found;
         } else {
                 /* Load the data to be stored from the L1 guest into our buf */
- -              vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ +              kvm_vcpu_srcu_read_lock(vcpu);
                 rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n);
- -              srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ +              kvm_vcpu_srcu_read_unlock(vcpu);
                 if (rc)
                         goto not_found;
   
@@@ -644,7 -637,7 +637,7 @@@ static void kvmhv_update_ptbl_cache(str
   
         ret = -EFAULT;
         ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4);
-       if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) {
+       if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4))) {
                 int srcu_idx = srcu_read_lock(&kvm->srcu);
                 ret = kvm_read_guest(kvm, ptbl_addr,
                                      &ptbl_entry, sizeof(ptbl_entry));
@@@ -660,6 -653,35 +653,35 @@@
         kvmhv_set_nested_ptbl(gp);
   }
   
+ void kvmhv_vm_nested_init(struct kvm *kvm)
+ {
+       idr_init(&kvm->arch.kvm_nested_guest_idr);
+ }
+ 
+ static struct kvm_nested_guest *__find_nested(struct kvm *kvm, int lpid)
+ {
+       return idr_find(&kvm->arch.kvm_nested_guest_idr, lpid);
+ }
+ 
+ static bool __prealloc_nested(struct kvm *kvm, int lpid)
+ {
+       if (idr_alloc(&kvm->arch.kvm_nested_guest_idr,
+                               NULL, lpid, lpid + 1, GFP_KERNEL) != lpid)
+               return false;
+       return true;
+ }
+ 
+ static void __add_nested(struct kvm *kvm, int lpid, struct kvm_nested_guest *gp)
+ {
+       if (idr_replace(&kvm->arch.kvm_nested_guest_idr, gp, lpid))
+               WARN_ON(1);
+ }
+ 
+ static void __remove_nested(struct kvm *kvm, int lpid)
+ {
+       idr_remove(&kvm->arch.kvm_nested_guest_idr, lpid);
+ }
+ 
   static struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid)
   {
         struct kvm_nested_guest *gp;
@@@ -720,13 -742,8 +742,8 @@@ static void kvmhv_remove_nested(struct 
         long ref;
   
         spin_lock(&kvm->mmu_lock);
-       if (gp == kvm->arch.nested_guests[lpid]) {
-               kvm->arch.nested_guests[lpid] = NULL;
-               if (lpid == kvm->arch.max_nested_lpid) {
-                       while (--lpid >= 0 && !kvm->arch.nested_guests[lpid])
-                               ;
-                       kvm->arch.max_nested_lpid = lpid;
-               }
+       if (gp == __find_nested(kvm, lpid)) {
+               __remove_nested(kvm, lpid);
                 --gp->refcnt;
         }
         ref = gp->refcnt;
@@@ -743,24 -760,22 +760,22 @@@
    */
   void kvmhv_release_all_nested(struct kvm *kvm)
   {
-       int i;
+       int lpid;
         struct kvm_nested_guest *gp;
         struct kvm_nested_guest *freelist = NULL;
         struct kvm_memory_slot *memslot;
         int srcu_idx, bkt;
   
         spin_lock(&kvm->mmu_lock);
-       for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
-               gp = kvm->arch.nested_guests[i];
-               if (!gp)
-                       continue;
-               kvm->arch.nested_guests[i] = NULL;
+       idr_for_each_entry(&kvm->arch.kvm_nested_guest_idr, gp, lpid) {
+               __remove_nested(kvm, lpid);
                 if (--gp->refcnt == 0) {
                         gp->next = freelist;
                         freelist = gp;
                 }
         }
-       kvm->arch.max_nested_lpid = -1;
+       idr_destroy(&kvm->arch.kvm_nested_guest_idr);
+       /* idr is empty and may be reused at this point */
         spin_unlock(&kvm->mmu_lock);
         while ((gp = freelist) != NULL) {
                 freelist = gp->next;
@@@ -792,12 -807,11 +807,11 @@@ struct kvm_nested_guest *kvmhv_get_nest
   {
         struct kvm_nested_guest *gp, *newgp;
   
-       if (l1_lpid >= KVM_MAX_NESTED_GUESTS ||
-           l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4)))
+       if (l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4)))
                 return NULL;
   
         spin_lock(&kvm->mmu_lock);
-       gp = kvm->arch.nested_guests[l1_lpid];
+       gp = __find_nested(kvm, l1_lpid);
         if (gp)
                 ++gp->refcnt;
         spin_unlock(&kvm->mmu_lock);
@@@ -808,17 -822,19 +822,19 @@@
         newgp = kvmhv_alloc_nested(kvm, l1_lpid);
         if (!newgp)
                 return NULL;
+ 
+       if (!__prealloc_nested(kvm, l1_lpid)) {
+               kvmhv_release_nested(newgp);
+               return NULL;
+       }
+ 
         spin_lock(&kvm->mmu_lock);
-       if (kvm->arch.nested_guests[l1_lpid]) {
-               /* someone else beat us to it */
-               gp = kvm->arch.nested_guests[l1_lpid];
-       } else {
-               kvm->arch.nested_guests[l1_lpid] = newgp;
+       gp = __find_nested(kvm, l1_lpid);
+       if (!gp) {
+               __add_nested(kvm, l1_lpid, newgp);
                 ++newgp->refcnt;
                 gp = newgp;
                 newgp = NULL;
-               if (l1_lpid > kvm->arch.max_nested_lpid)
-                       kvm->arch.max_nested_lpid = l1_lpid;
         }
         ++gp->refcnt;
         spin_unlock(&kvm->mmu_lock);
@@@ -841,20 -857,13 +857,13 @@@ void kvmhv_put_nested(struct kvm_nested
                 kvmhv_release_nested(gp);
   }
   
- static struct kvm_nested_guest *kvmhv_find_nested(struct kvm *kvm, int lpid)
- {
-       if (lpid > kvm->arch.max_nested_lpid)
-               return NULL;
-       return kvm->arch.nested_guests[lpid];
- }
- 
   pte_t *find_kvm_nested_guest_pte(struct kvm *kvm, unsigned long lpid,
                                  unsigned long ea, unsigned *hshift)
   {
         struct kvm_nested_guest *gp;
         pte_t *pte;
   
-       gp = kvmhv_find_nested(kvm, lpid);
+       gp = __find_nested(kvm, lpid);
         if (!gp)
                 return NULL;
   
@@@ -960,7 -969,7 +969,7 @@@ static void kvmhv_remove_nest_rmap(stru
   
         gpa = n_rmap & RMAP_NESTED_GPA_MASK;
         lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
-       gp = kvmhv_find_nested(kvm, lpid);
+       gp = __find_nested(kvm, lpid);
         if (!gp)
                 return;
   
@@@ -1152,16 -1161,13 +1161,13 @@@ static void kvmhv_emulate_tlbie_all_lpi
   {
         struct kvm *kvm = vcpu->kvm;
         struct kvm_nested_guest *gp;
-       int i;
+       int lpid;
   
         spin_lock(&kvm->mmu_lock);
-       for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
-               gp = kvm->arch.nested_guests[i];
-               if (gp) {
-                       spin_unlock(&kvm->mmu_lock);
-                       kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
-                       spin_lock(&kvm->mmu_lock);
-               }
+       idr_for_each_entry(&kvm->arch.kvm_nested_guest_idr, gp, lpid) {
+               spin_unlock(&kvm->mmu_lock);
+               kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
+               spin_lock(&kvm->mmu_lock);
         }
         spin_unlock(&kvm->mmu_lock);
   }
@@@ -1313,7 -1319,7 +1319,7 @@@ long do_h_rpt_invalidate_pat(struct kvm
          * H_ENTER_NESTED call. Since we can't differentiate this case from
          * the invalid case, we ignore such flush requests and return success.
          */
-       if (!kvmhv_find_nested(vcpu->kvm, lpid))
+       if (!__find_nested(vcpu->kvm, lpid))
                 return H_SUCCESS;
   
         /*
@@@ -1657,15 -1663,12 +1663,12 @@@ long int kvmhv_nested_page_fault(struc
   
   int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid)
   {
-       int ret = -1;
+       int ret = lpid + 1;
   
         spin_lock(&kvm->mmu_lock);
-       while (++lpid <= kvm->arch.max_nested_lpid) {
-               if (kvm->arch.nested_guests[lpid]) {
-                       ret = lpid;
-                       break;
-               }
-       }
+       if (!idr_get_next(&kvm->arch.kvm_nested_guest_idr, &ret))
+               ret = -1;
         spin_unlock(&kvm->mmu_lock);
+ 
         return ret;
   }
diff --combined arch/powerpc/kvm/book3s_hv_p9_entry.c

index ac38c1c,9dba3e3..112a09b
--- 1/arch/powerpc/kvm/book3s_hv_p9_entry.c
--- 2/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@@ -379,7 -379,7 +379,7 @@@ void restore_p9_host_os_sprs(struct kvm
   {
         /*
          * current->thread.xxx registers must all be restored to host
- -       * values before a potential context switch, othrewise the context
+ +       * values before a potential context switch, otherwise the context
          * switch itself will overwrite current->thread.xxx with the values
          * from the guest SPRs.
          */
@@@ -539,8 -539,10 +539,10 @@@ static void switch_mmu_to_guest_radix(s
   {
         struct kvm_nested_guest *nested = vcpu->arch.nested;
         u32 lpid;
+       u32 pid;
   
         lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
+       pid = vcpu->arch.pid;
   
         /*
          * Prior memory accesses to host PID Q3 must be completed before we
@@@ -551,7 -553,7 +553,7 @@@
         isync();
         mtspr(SPRN_LPID, lpid);
         mtspr(SPRN_LPCR, lpcr);
-       mtspr(SPRN_PID, vcpu->arch.pid);
+       mtspr(SPRN_PID, pid);
         /*
          * isync not required here because we are HRFID'ing to guest before
          * any guest context access, which is context synchronising.
@@@ -561,9 -563,11 +563,11 @@@
   static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
   {
         u32 lpid;
+       u32 pid;
         int i;
   
         lpid = kvm->arch.lpid;
+       pid = vcpu->arch.pid;
   
         /*
          * See switch_mmu_to_guest_radix. ptesync should not be required here
@@@ -574,7 -578,7 +578,7 @@@
         isync();
         mtspr(SPRN_LPID, lpid);
         mtspr(SPRN_LPCR, lpcr);
-       mtspr(SPRN_PID, vcpu->arch.pid);
+       mtspr(SPRN_PID, pid);
   
         for (i = 0; i < vcpu->arch.slb_max; i++)
                 mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv);
@@@ -585,6 -589,9 +589,9 @@@
   
   static void switch_mmu_to_host(struct kvm *kvm, u32 pid)
   {
+       u32 lpid = kvm->arch.host_lpid;
+       u64 lpcr = kvm->arch.host_lpcr;
+ 
         /*
          * The guest has exited, so guest MMU context is no longer being
          * non-speculatively accessed, but a hwsync is needed before the
@@@ -594,8 -601,8 +601,8 @@@
         asm volatile("hwsync" ::: "memory");
         isync();
         mtspr(SPRN_PID, pid);
-       mtspr(SPRN_LPID, kvm->arch.host_lpid);
-       mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
+       mtspr(SPRN_LPID, lpid);
+       mtspr(SPRN_LPCR, lpcr);
         /*
          * isync is not required after the switch, because mtmsrd with L=0
          * is performed after this switch, which is context synchronising.
diff --combined arch/powerpc/kvm/book3s_hv_uvmem.c

index 0f66f7f,36f2314..5980063
--- 1/arch/powerpc/kvm/book3s_hv_uvmem.c
--- 2/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@@ -120,7 -120,7 +120,7 @@@ static DEFINE_SPINLOCK(kvmppc_uvmem_bit
    *    content is un-encrypted.
    *
    * (c) Normal - The GFN is a normal. The GFN is associated with
- - *    a normal VM. The contents of the GFN is accesible to
+ + *    a normal VM. The contents of the GFN is accessible to
    *    the Hypervisor. Its content is never encrypted.
    *
    * States of a VM.
@@@ -361,13 -361,15 +361,15 @@@ static bool kvmppc_gfn_is_uvmem_pfn(uns
   static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot,
                 struct kvm *kvm, unsigned long *gfn)
   {
-       struct kvmppc_uvmem_slot *p;
+       struct kvmppc_uvmem_slot *p = NULL, *iter;
         bool ret = false;
         unsigned long i;
   
-       list_for_each_entry(p, &kvm->arch.uvmem_pfns, list)
-               if (*gfn >= p->base_pfn && *gfn < p->base_pfn + p->nr_pfns)
+       list_for_each_entry(iter, &kvm->arch.uvmem_pfns, list)
+               if (*gfn >= iter->base_pfn && *gfn < iter->base_pfn + iter->nr_pfns) {
+                       p = iter;
                         break;
+               }
         if (!p)
                 return ret;
         /*
diff --combined arch/powerpc/kvm/book3s_xive.c

index 24d434f,3d47862..4ca2364
--- 1/arch/powerpc/kvm/book3s_xive.c
--- 2/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@@ -30,27 -30,629 +30,629 @@@
   
   #include "book3s_xive.h"
   
- 
- /*
-  * Virtual mode variants of the hcalls for use on radix/radix
-  * with AIL. They require the VCPU's VP to be "pushed"
-  *
-  * We still instantiate them here because we use some of the
-  * generated utility functions as well in this file.
-  */
- #define XIVE_RUNTIME_CHECKS
- #define X_PFX xive_vm_
- #define X_STATIC static
- #define X_STAT_PFX stat_vm_
- #define __x_tima              xive_tima
   #define __x_eoi_page(xd)      ((void __iomem *)((xd)->eoi_mmio))
   #define __x_trig_page(xd)     ((void __iomem *)((xd)->trig_mmio))
- #define __x_writeb    __raw_writeb
- #define __x_readw     __raw_readw
- #define __x_readq     __raw_readq
- #define __x_writeq    __raw_writeq
   
- #include "book3s_xive_template.c"
+ /* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
+ #define XICS_DUMMY    1
+ 
+ static void xive_vm_ack_pending(struct kvmppc_xive_vcpu *xc)
+ {
+       u8 cppr;
+       u16 ack;
+ 
+       /*
+        * Ensure any previous store to CPPR is ordered vs.
+        * the subsequent loads from PIPR or ACK.
+        */
+       eieio();
+ 
+       /* Perform the acknowledge OS to register cycle. */
+       ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_OS_REG));
+ 
+       /* Synchronize subsequent queue accesses */
+       mb();
+ 
+       /* XXX Check grouping level */
+ 
+       /* Anything ? */
+       if (!((ack >> 8) & TM_QW1_NSR_EO))
+               return;
+ 
+       /* Grab CPPR of the most favored pending interrupt */
+       cppr = ack & 0xff;
+       if (cppr < 8)
+               xc->pending |= 1 << cppr;
+ 
+       /* Check consistency */
+       if (cppr >= xc->hw_cppr)
+               pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
+                       smp_processor_id(), cppr, xc->hw_cppr);
+ 
+       /*
+        * Update our image of the HW CPPR. We don't yet modify
+        * xc->cppr, this will be done as we scan for interrupts
+        * in the queues.
+        */
+       xc->hw_cppr = cppr;
+ }
+ 
+ static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
+ {
+       u64 val;
+ 
+       if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
+               offset |= XIVE_ESB_LD_ST_MO;
+ 
+       val = __raw_readq(__x_eoi_page(xd) + offset);
+ #ifdef __LITTLE_ENDIAN__
+       val >>= 64-8;
+ #endif
+       return (u8)val;
+ }
+ 
+ 
+ static void xive_vm_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
+ {
+       /* If the XIVE supports the new "store EOI facility, use it */
+       if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
+               __raw_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
+       else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
+               /*
+                * For LSIs the HW EOI cycle is used rather than PQ bits,
+                * as they are automatically re-triggred in HW when still
+                * pending.
+                */
+               __raw_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
+       } else {
+               uint64_t eoi_val;
+ 
+               /*
+                * Otherwise for EOI, we use the special MMIO that does
+                * a clear of both P and Q and returns the old Q,
+                * except for LSIs where we use the "EOI cycle" special
+                * load.
+                *
+                * This allows us to then do a re-trigger if Q was set
+                * rather than synthetizing an interrupt in software
+                */
+               eoi_val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_00);
+ 
+               /* Re-trigger if needed */
+               if ((eoi_val & 1) && __x_trig_page(xd))
+                       __raw_writeq(0, __x_trig_page(xd));
+       }
+ }
+ 
+ enum {
+       scan_fetch,
+       scan_poll,
+       scan_eoi,
+ };
+ 
+ static u32 xive_vm_scan_interrupts(struct kvmppc_xive_vcpu *xc,
+                                      u8 pending, int scan_type)
+ {
+       u32 hirq = 0;
+       u8 prio = 0xff;
+ 
+       /* Find highest pending priority */
+       while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
+               struct xive_q *q;
+               u32 idx, toggle;
+               __be32 *qpage;
+ 
+               /*
+                * If pending is 0 this will return 0xff which is what
+                * we want
+                */
+               prio = ffs(pending) - 1;
+ 
+               /* Don't scan past the guest cppr */
+               if (prio >= xc->cppr || prio > 7) {
+                       if (xc->mfrr < xc->cppr) {
+                               prio = xc->mfrr;
+                               hirq = XICS_IPI;
+                       }
+                       break;
+               }
+ 
+               /* Grab queue and pointers */
+               q = &xc->queues[prio];
+               idx = q->idx;
+               toggle = q->toggle;
+ 
+               /*
+                * Snapshot the queue page. The test further down for EOI
+                * must use the same "copy" that was used by __xive_read_eq
+                * since qpage can be set concurrently and we don't want
+                * to miss an EOI.
+                */
+               qpage = READ_ONCE(q->qpage);
+ 
+ skip_ipi:
+               /*
+                * Try to fetch from the queue. Will return 0 for a
+                * non-queueing priority (ie, qpage = 0).
+                */
+               hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
+ 
+               /*
+                * If this was a signal for an MFFR change done by
+                * H_IPI we skip it. Additionally, if we were fetching
+                * we EOI it now, thus re-enabling reception of a new
+                * such signal.
+                *
+                * We also need to do that if prio is 0 and we had no
+                * page for the queue. In this case, we have non-queued
+                * IPI that needs to be EOId.
+                *
+                * This is safe because if we have another pending MFRR
+                * change that wasn't observed above, the Q bit will have
+                * been set and another occurrence of the IPI will trigger.
+                */
+               if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
+                       if (scan_type == scan_fetch) {
+                               xive_vm_source_eoi(xc->vp_ipi,
+                                                      &xc->vp_ipi_data);
+                               q->idx = idx;
+                               q->toggle = toggle;
+                       }
+                       /* Loop back on same queue with updated idx/toggle */
+                       WARN_ON(hirq && hirq != XICS_IPI);
+                       if (hirq)
+                               goto skip_ipi;
+               }
+ 
+               /* If it's the dummy interrupt, continue searching */
+               if (hirq == XICS_DUMMY)
+                       goto skip_ipi;
+ 
+               /* Clear the pending bit if the queue is now empty */
+               if (!hirq) {
+                       pending &= ~(1 << prio);
+ 
+                       /*
+                        * Check if the queue count needs adjusting due to
+                        * interrupts being moved away.
+                        */
+                       if (atomic_read(&q->pending_count)) {
+                               int p = atomic_xchg(&q->pending_count, 0);
+ 
+                               if (p) {
+                                       WARN_ON(p > atomic_read(&q->count));
+                                       atomic_sub(p, &q->count);
+                               }
+                       }
+               }
+ 
+               /*
+                * If the most favoured prio we found pending is less
+                * favored (or equal) than a pending IPI, we return
+                * the IPI instead.
+                */
+               if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
+                       prio = xc->mfrr;
+                       hirq = XICS_IPI;
+                       break;
+               }
+ 
+               /* If fetching, update queue pointers */
+               if (scan_type == scan_fetch) {
+                       q->idx = idx;
+                       q->toggle = toggle;
+               }
+       }
+ 
+       /* If we are just taking a "peek", do nothing else */
+       if (scan_type == scan_poll)
+               return hirq;
+ 
+       /* Update the pending bits */
+       xc->pending = pending;
+ 
+       /*
+        * If this is an EOI that's it, no CPPR adjustment done here,
+        * all we needed was cleanup the stale pending bits and check
+        * if there's anything left.
+        */
+       if (scan_type == scan_eoi)
+               return hirq;
+ 
+       /*
+        * If we found an interrupt, adjust what the guest CPPR should
+        * be as if we had just fetched that interrupt from HW.
+        *
+        * Note: This can only make xc->cppr smaller as the previous
+        * loop will only exit with hirq != 0 if prio is lower than
+        * the current xc->cppr. Thus we don't need to re-check xc->mfrr
+        * for pending IPIs.
+        */
+       if (hirq)
+               xc->cppr = prio;
+       /*
+        * If it was an IPI the HW CPPR might have been lowered too much
+        * as the HW interrupt we use for IPIs is routed to priority 0.
+        *
+        * We re-sync it here.
+        */
+       if (xc->cppr != xc->hw_cppr) {
+               xc->hw_cppr = xc->cppr;
+               __raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR);
+       }
+ 
+       return hirq;
+ }
+ 
+ static unsigned long xive_vm_h_xirr(struct kvm_vcpu *vcpu)
+ {
+       struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+       u8 old_cppr;
+       u32 hirq;
+ 
+       pr_devel("H_XIRR\n");
+ 
+       xc->stat_vm_h_xirr++;
+ 
+       /* First collect pending bits from HW */
+       xive_vm_ack_pending(xc);
+ 
+       pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
+                xc->pending, xc->hw_cppr, xc->cppr);
+ 
+       /* Grab previous CPPR and reverse map it */
+       old_cppr = xive_prio_to_guest(xc->cppr);
+ 
+       /* Scan for actual interrupts */
+       hirq = xive_vm_scan_interrupts(xc, xc->pending, scan_fetch);
+ 
+       pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
+                hirq, xc->hw_cppr, xc->cppr);
+ 
+       /* That should never hit */
+       if (hirq & 0xff000000)
+               pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
+ 
+       /*
+        * XXX We could check if the interrupt is masked here and
+        * filter it. If we chose to do so, we would need to do:
+        *
+        *    if (masked) {
+        *        lock();
+        *        if (masked) {
+        *            old_Q = true;
+        *            hirq = 0;
+        *        }
+        *        unlock();
+        *    }
+        */
+ 
+       /* Return interrupt and old CPPR in GPR4 */
+       vcpu->arch.regs.gpr[4] = hirq | (old_cppr << 24);
+ 
+       return H_SUCCESS;
+ }
+ 
+ static unsigned long xive_vm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
+ {
+       struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+       u8 pending = xc->pending;
+       u32 hirq;
+ 
+       pr_devel("H_IPOLL(server=%ld)\n", server);
+ 
+       xc->stat_vm_h_ipoll++;
+ 
+       /* Grab the target VCPU if not the current one */
+       if (xc->server_num != server) {
+               vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
+               if (!vcpu)
+                       return H_PARAMETER;
+               xc = vcpu->arch.xive_vcpu;
+ 
+               /* Scan all priorities */
+               pending = 0xff;
+       } else {
+               /* Grab pending interrupt if any */
+               __be64 qw1 = __raw_readq(xive_tima + TM_QW1_OS);
+               u8 pipr = be64_to_cpu(qw1) & 0xff;
+ 
+               if (pipr < 8)
+                       pending |= 1 << pipr;
+       }
+ 
+       hirq = xive_vm_scan_interrupts(xc, pending, scan_poll);
+ 
+       /* Return interrupt and old CPPR in GPR4 */
+       vcpu->arch.regs.gpr[4] = hirq | (xc->cppr << 24);
+ 
+       return H_SUCCESS;
+ }
+ 
+ static void xive_vm_push_pending_to_hw(struct kvmppc_xive_vcpu *xc)
+ {
+       u8 pending, prio;
+ 
+       pending = xc->pending;
+       if (xc->mfrr != 0xff) {
+               if (xc->mfrr < 8)
+                       pending |= 1 << xc->mfrr;
+               else
+                       pending |= 0x80;
+       }
+       if (!pending)
+               return;
+       prio = ffs(pending) - 1;
+ 
+       __raw_writeb(prio, xive_tima + TM_SPC_SET_OS_PENDING);
+ }
+ 
+ static void xive_vm_scan_for_rerouted_irqs(struct kvmppc_xive *xive,
+                                              struct kvmppc_xive_vcpu *xc)
+ {
+       unsigned int prio;
+ 
+       /* For each priority that is now masked */
+       for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
+               struct xive_q *q = &xc->queues[prio];
+               struct kvmppc_xive_irq_state *state;
+               struct kvmppc_xive_src_block *sb;
+               u32 idx, toggle, entry, irq, hw_num;
+               struct xive_irq_data *xd;
+               __be32 *qpage;
+               u16 src;
+ 
+               idx = q->idx;
+               toggle = q->toggle;
+               qpage = READ_ONCE(q->qpage);
+               if (!qpage)
+                       continue;
+ 
+               /* For each interrupt in the queue */
+               for (;;) {
+                       entry = be32_to_cpup(qpage + idx);
+ 
+                       /* No more ? */
+                       if ((entry >> 31) == toggle)
+                               break;
+                       irq = entry & 0x7fffffff;
+ 
+                       /* Skip dummies and IPIs */
+                       if (irq == XICS_DUMMY || irq == XICS_IPI)
+                               goto next;
+                       sb = kvmppc_xive_find_source(xive, irq, &src);
+                       if (!sb)
+                               goto next;
+                       state = &sb->irq_state[src];
+ 
+                       /* Has it been rerouted ? */
+                       if (xc->server_num == state->act_server)
+                               goto next;
+ 
+                       /*
+                        * Allright, it *has* been re-routed, kill it from
+                        * the queue.
+                        */
+                       qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
+ 
+                       /* Find the HW interrupt */
+                       kvmppc_xive_select_irq(state, &hw_num, &xd);
+ 
+                       /* If it's not an LSI, set PQ to 11 the EOI will force a resend */
+                       if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
+                               xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11);
+ 
+                       /* EOI the source */
+                       xive_vm_source_eoi(hw_num, xd);
+ 
+ next:
+                       idx = (idx + 1) & q->msk;
+                       if (idx == 0)
+                               toggle ^= 1;
+               }
+       }
+ }
+ 
+ static int xive_vm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+ {
+       struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+       struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+       u8 old_cppr;
+ 
+       pr_devel("H_CPPR(cppr=%ld)\n", cppr);
+ 
+       xc->stat_vm_h_cppr++;
+ 
+       /* Map CPPR */
+       cppr = xive_prio_from_guest(cppr);
+ 
+       /* Remember old and update SW state */
+       old_cppr = xc->cppr;
+       xc->cppr = cppr;
+ 
+       /*
+        * Order the above update of xc->cppr with the subsequent
+        * read of xc->mfrr inside push_pending_to_hw()
+        */
+       smp_mb();
+ 
+       if (cppr > old_cppr) {
+               /*
+                * We are masking less, we need to look for pending things
+                * to deliver and set VP pending bits accordingly to trigger
+                * a new interrupt otherwise we might miss MFRR changes for
+                * which we have optimized out sending an IPI signal.
+                */
+               xive_vm_push_pending_to_hw(xc);
+       } else {
+               /*
+                * We are masking more, we need to check the queue for any
+                * interrupt that has been routed to another CPU, take
+                * it out (replace it with the dummy) and retrigger it.
+                *
+                * This is necessary since those interrupts may otherwise
+                * never be processed, at least not until this CPU restores
+                * its CPPR.
+                *
+                * This is in theory racy vs. HW adding new interrupts to
+                * the queue. In practice this works because the interesting
+                * cases are when the guest has done a set_xive() to move the
+                * interrupt away, which flushes the xive, followed by the
+                * target CPU doing a H_CPPR. So any new interrupt coming into
+                * the queue must still be routed to us and isn't a source
+                * of concern.
+                */
+               xive_vm_scan_for_rerouted_irqs(xive, xc);
+       }
+ 
+       /* Apply new CPPR */
+       xc->hw_cppr = cppr;
+       __raw_writeb(cppr, xive_tima + TM_QW1_OS + TM_CPPR);
+ 
+       return H_SUCCESS;
+ }
+ 
+ static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+ {
+       struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+       struct kvmppc_xive_src_block *sb;
+       struct kvmppc_xive_irq_state *state;
+       struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+       struct xive_irq_data *xd;
+       u8 new_cppr = xirr >> 24;
+       u32 irq = xirr & 0x00ffffff, hw_num;
+       u16 src;
+       int rc = 0;
+ 
+       pr_devel("H_EOI(xirr=%08lx)\n", xirr);
+ 
+       xc->stat_vm_h_eoi++;
+ 
+       xc->cppr = xive_prio_from_guest(new_cppr);
+ 
+       /*
+        * IPIs are synthetized from MFRR and thus don't need
+        * any special EOI handling. The underlying interrupt
+        * used to signal MFRR changes is EOId when fetched from
+        * the queue.
+        */
+       if (irq == XICS_IPI || irq == 0) {
+               /*
+                * This barrier orders the setting of xc->cppr vs.
+                * subsquent test of xc->mfrr done inside
+                * scan_interrupts and push_pending_to_hw
+                */
+               smp_mb();
+               goto bail;
+       }
+ 
+       /* Find interrupt source */
+       sb = kvmppc_xive_find_source(xive, irq, &src);
+       if (!sb) {
+               pr_devel(" source not found !\n");
+               rc = H_PARAMETER;
+               /* Same as above */
+               smp_mb();
+               goto bail;
+       }
+       state = &sb->irq_state[src];
+       kvmppc_xive_select_irq(state, &hw_num, &xd);
+ 
+       state->in_eoi = true;
+ 
+       /*
+        * This barrier orders both setting of in_eoi above vs,
+        * subsequent test of guest_priority, and the setting
+        * of xc->cppr vs. subsquent test of xc->mfrr done inside
+        * scan_interrupts and push_pending_to_hw
+        */
+       smp_mb();
+ 
+ again:
+       if (state->guest_priority == MASKED) {
+               arch_spin_lock(&sb->lock);
+               if (state->guest_priority != MASKED) {
+                       arch_spin_unlock(&sb->lock);
+                       goto again;
+               }
+               pr_devel(" EOI on saved P...\n");
+ 
+               /* Clear old_p, that will cause unmask to perform an EOI */
+               state->old_p = false;
+ 
+               arch_spin_unlock(&sb->lock);
+       } else {
+               pr_devel(" EOI on source...\n");
+ 
+               /* Perform EOI on the source */
+               xive_vm_source_eoi(hw_num, xd);
+ 
+               /* If it's an emulated LSI, check level and resend */
+               if (state->lsi && state->asserted)
+                       __raw_writeq(0, __x_trig_page(xd));
+ 
+       }
+ 
+       /*
+        * This barrier orders the above guest_priority check
+        * and spin_lock/unlock with clearing in_eoi below.
+        *
+        * It also has to be a full mb() as it must ensure
+        * the MMIOs done in source_eoi() are completed before
+        * state->in_eoi is visible.
+        */
+       mb();
+       state->in_eoi = false;
+ bail:
+ 
+       /* Re-evaluate pending IRQs and update HW */
+       xive_vm_scan_interrupts(xc, xc->pending, scan_eoi);
+       xive_vm_push_pending_to_hw(xc);
+       pr_devel(" after scan pending=%02x\n", xc->pending);
+ 
+       /* Apply new CPPR */
+       xc->hw_cppr = xc->cppr;
+       __raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR);
+ 
+       return rc;
+ }
+ 
+ static int xive_vm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+                              unsigned long mfrr)
+ {
+       struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ 
+       pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
+ 
+       xc->stat_vm_h_ipi++;
+ 
+       /* Find target */
+       vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
+       if (!vcpu)
+               return H_PARAMETER;
+       xc = vcpu->arch.xive_vcpu;
+ 
+       /* Locklessly write over MFRR */
+       xc->mfrr = mfrr;
+ 
+       /*
+        * The load of xc->cppr below and the subsequent MMIO store
+        * to the IPI must happen after the above mfrr update is
+        * globally visible so that:
+        *
+        * - Synchronize with another CPU doing an H_EOI or a H_CPPR
+        *   updating xc->cppr then reading xc->mfrr.
+        *
+        * - The target of the IPI sees the xc->mfrr update
+        */
+       mb();
+ 
+       /* Shoot the IPI if most favored than target cppr */
+       if (mfrr < xc->cppr)
+               __raw_writeq(0, __x_trig_page(&xc->vp_ipi_data));
+ 
+       return H_SUCCESS;
+ }
   
   /*
    * We leave a gap of a couple of interrupts in the queue to
@@@ -124,7 -726,7 +726,7 @@@ void kvmppc_xive_push_vcpu(struct kvm_v
                  * interrupt might have fired and be on its way to the
                  * host queue while we mask it, and if we unmask it
                  * early enough (re-cede right away), there is a
- -               * theorical possibility that it fires again, thus
+ +               * theoretical possibility that it fires again, thus
                  * landing in the target queue more than once which is
                  * a big no-no.
                  *
@@@ -179,12 -781,13 +781,13 @@@ void kvmppc_xive_pull_vcpu(struct kvm_v
   }
   EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu);
   
- void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu)
+ bool kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu)
   {
         void __iomem *esc_vaddr = (void __iomem *)vcpu->arch.xive_esc_vaddr;
+       bool ret = true;
   
         if (!esc_vaddr)
-               return;
+               return ret;
   
         /* we are using XIVE with single escalation */
   
@@@ -197,7 -800,7 +800,7 @@@
                  * we also don't want to set xive_esc_on to 1 here in
                  * case we race with xive_esc_irq().
                  */
-               vcpu->arch.ceded = 0;
+               ret = false;
                 /*
                  * The escalation interrupts are special as we don't EOI them.
                  * There is no need to use the load-after-store ordering offset
@@@ -210,6 -813,8 +813,8 @@@
                 __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_00);
         }
         mb();
+ 
+       return ret;
   }
   EXPORT_SYMBOL_GPL(kvmppc_xive_rearm_escalation);
   
@@@ -238,7 -843,7 +843,7 @@@ static irqreturn_t xive_esc_irq(int irq
   
         vcpu->arch.irq_pending = 1;
         smp_mb();
-       if (vcpu->arch.ceded)
+       if (vcpu->arch.ceded || vcpu->arch.nested)
                 kvmppc_fast_vcpu_kick(vcpu);
   
         /* Since we have the no-EOI flag, the interrupt is effectively
@@@ -622,7 -1227,7 +1227,7 @@@ static int xive_target_interrupt(struc
   
   /*
    * Targetting rules: In order to avoid losing track of
- - * pending interrupts accross mask and unmask, which would
+ + * pending interrupts across mask and unmask, which would
    * allow queue overflows, we implement the following rules:
    *
    *  - Unless it was never enabled (or we run out of capacity)
@@@ -1073,7 -1678,7 +1678,7 @@@ int kvmppc_xive_clr_mapped(struct kvm *
         /*
          * If old_p is set, the interrupt is pending, we switch it to
          * PQ=11. This will force a resend in the host so the interrupt
- -       * isn't lost to whatver host driver may pick it up
+ +       * isn't lost to whatever host driver may pick it up
          */
         if (state->old_p)
                 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_11);
diff --combined arch/powerpc/kvm/e500mc.c

index 4ff1372,2b9ad89..57e0ad6
--- 1/arch/powerpc/kvm/e500mc.c
--- 2/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@@ -309,7 -309,7 +309,7 @@@ static int kvmppc_core_vcpu_create_e500
         BUILD_BUG_ON(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0);
         vcpu_e500 = to_e500(vcpu);
   
- -      /* Invalid PIR value -- this LPID dosn't have valid state on any cpu */
+ +      /* Invalid PIR value -- this LPID doesn't have valid state on any cpu */
         vcpu->arch.oldpir = 0xffffffff;
   
         err = kvmppc_e500_tlb_init(vcpu_e500);
@@@ -399,7 -399,6 +399,6 @@@ static int __init kvmppc_e500mc_init(vo
          * allocator.
          */
         kvmppc_init_lpid(KVMPPC_NR_LPIDS/threads_per_core);
-       kvmppc_claim_lpid(0); /* host */
   
         r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
         if (r)
diff --combined arch/powerpc/kvm/powerpc.c

index 81069e3,3256119..191992f
--- 1/arch/powerpc/kvm/powerpc.c
--- 2/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@@ -19,7 -19,6 +19,7 @@@
   #include <linux/module.h>
   #include <linux/irqbypass.h>
   #include <linux/kvm_irqfd.h>
+ +#include <linux/of.h>
   #include <asm/cputable.h>
   #include <linux/uaccess.h>
   #include <asm/kvm_ppc.h>
@@@ -426,9 -425,9 +426,9 @@@ int kvmppc_ld(struct kvm_vcpu *vcpu, ul
                 return EMULATE_DONE;
         }
   
- -      vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ +      kvm_vcpu_srcu_read_lock(vcpu);
         rc = kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size);
- -      srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ +      kvm_vcpu_srcu_read_unlock(vcpu);
         if (rc)
                 return EMULATE_DO_MMIO;
   
@@@ -2497,41 -2496,37 +2497,37 @@@ out
         return r;
   }
   
- static unsigned long lpid_inuse[BITS_TO_LONGS(KVMPPC_NR_LPIDS)];
+ static DEFINE_IDA(lpid_inuse);
   static unsigned long nr_lpids;
   
   long kvmppc_alloc_lpid(void)
   {
-       long lpid;
+       int lpid;
   
-       do {
-               lpid = find_first_zero_bit(lpid_inuse, KVMPPC_NR_LPIDS);
-               if (lpid >= nr_lpids) {
+       /* The host LPID must always be 0 (allocation starts at 1) */
+       lpid = ida_alloc_range(&lpid_inuse, 1, nr_lpids - 1, GFP_KERNEL);
+       if (lpid < 0) {
+               if (lpid == -ENOMEM)
+                       pr_err("%s: Out of memory\n", __func__);
+               else
                         pr_err("%s: No LPIDs free\n", __func__);
-                       return -ENOMEM;
-               }
-       } while (test_and_set_bit(lpid, lpid_inuse));
+               return -ENOMEM;
+       }
   
         return lpid;
   }
   EXPORT_SYMBOL_GPL(kvmppc_alloc_lpid);
   
- void kvmppc_claim_lpid(long lpid)
- {
-       set_bit(lpid, lpid_inuse);
- }
- EXPORT_SYMBOL_GPL(kvmppc_claim_lpid);
- 
   void kvmppc_free_lpid(long lpid)
   {
-       clear_bit(lpid, lpid_inuse);
+       ida_free(&lpid_inuse, lpid);
   }
   EXPORT_SYMBOL_GPL(kvmppc_free_lpid);
   
+ /* nr_lpids_param includes the host LPID */
   void kvmppc_init_lpid(unsigned long nr_lpids_param)
   {
-       nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param);
-       memset(lpid_inuse, 0, sizeof(lpid_inuse));
+       nr_lpids = nr_lpids_param;
   }
   EXPORT_SYMBOL_GPL(kvmppc_init_lpid);
   
diff --combined arch/powerpc/mm/init_64.c

index 2e11952,0f26086..05b0d58
--- 1/arch/powerpc/mm/init_64.c
--- 2/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@@ -111,7 -111,7 +111,7 @@@ static int __meminit vmemmap_populated(
   }
   
   /*
- - * vmemmap virtual address space management does not have a traditonal page
+ + * vmemmap virtual address space management does not have a traditional page
    * table to track which virtual struct pages are backed by physical mapping.
    * The virtual to physical mappings are tracked in a simple linked list
    * format. 'vmemmap_list' maintains the entire vmemmap physical mapping at
@@@ -128,7 -128,7 +128,7 @@@ static struct vmemmap_backing *next
   
   /*
    * The same pointer 'next' tracks individual chunks inside the allocated
- - * full page during the boot time and again tracks the freeed nodes during
+ + * full page during the boot time and again tracks the freed nodes during
    * runtime. It is racy but it does not happen as they are separated by the
    * boot process. Will create problem if some how we have memory hotplug
    * operation during boot !!
@@@ -372,6 -372,9 +372,9 @@@ void register_page_bootmem_memmap(unsig
   
   #ifdef CONFIG_PPC_BOOK3S_64
   unsigned int mmu_lpid_bits;
+ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ EXPORT_SYMBOL_GPL(mmu_lpid_bits);
+ #endif
   unsigned int mmu_pid_bits;
   
   static bool disable_radix = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
diff --combined arch/powerpc/platforms/powernv/pci-ioda.c

index 6fbf265,aef22ee..6923f64
--- 1/arch/powerpc/platforms/powernv/pci-ioda.c
--- 2/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@@ -21,11 -21,10 +21,11 @@@
   #include <linux/rculist.h>
   #include <linux/sizes.h>
   #include <linux/debugfs.h>
+ +#include <linux/of_address.h>
+ +#include <linux/of_irq.h>
   
   #include <asm/sections.h>
   #include <asm/io.h>
- -#include <asm/prom.h>
   #include <asm/pci-bridge.h>
   #include <asm/machdep.h>
   #include <asm/msi_bitmap.h>
@@@ -1268,22 -1267,20 +1268,20 @@@ static bool pnv_pci_ioda_iommu_bypass_s
         return false;
   }
   
- static inline __be64 __iomem *pnv_ioda_get_inval_reg(struct pnv_phb *phb,
-                                                    bool real_mode)
+ static inline __be64 __iomem *pnv_ioda_get_inval_reg(struct pnv_phb *phb)
   {
-       return real_mode ? (__be64 __iomem *)(phb->regs_phys + 0x210) :
-               (phb->regs + 0x210);
+       return phb->regs + 0x210;
   }
   
   static void pnv_pci_p7ioc_tce_invalidate(struct iommu_table *tbl,
-               unsigned long index, unsigned long npages, bool rm)
+               unsigned long index, unsigned long npages)
   {
         struct iommu_table_group_link *tgl = list_first_entry_or_null(
                         &tbl->it_group_list, struct iommu_table_group_link,
                         next);
         struct pnv_ioda_pe *pe = container_of(tgl->table_group,
                         struct pnv_ioda_pe, table_group);
-       __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, rm);
+       __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb);
         unsigned long start, end, inc;
   
         start = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset);
@@@ -1298,11 -1295,7 +1296,7 @@@
   
           mb(); /* Ensure above stores are visible */
           while (start <= end) {
-               if (rm)
-                       __raw_rm_writeq_be(start, invalidate);
-               else
-                       __raw_writeq_be(start, invalidate);
- 
+               __raw_writeq_be(start, invalidate);
                   start += inc;
           }
   
@@@ -1321,7 -1314,7 +1315,7 @@@ static int pnv_ioda1_tce_build(struct i
                         attrs);
   
         if (!ret)
-               pnv_pci_p7ioc_tce_invalidate(tbl, index, npages, false);
+               pnv_pci_p7ioc_tce_invalidate(tbl, index, npages);
   
         return ret;
   }
@@@ -1329,10 -1322,9 +1323,9 @@@
   #ifdef CONFIG_IOMMU_API
   /* Common for IODA1 and IODA2 */
   static int pnv_ioda_tce_xchg_no_kill(struct iommu_table *tbl, long index,
-               unsigned long *hpa, enum dma_data_direction *direction,
-               bool realmode)
+               unsigned long *hpa, enum dma_data_direction *direction)
   {
-       return pnv_tce_xchg(tbl, index, hpa, direction, !realmode);
+       return pnv_tce_xchg(tbl, index, hpa, direction);
   }
   #endif
   
@@@ -1341,7 -1333,7 +1334,7 @@@ static void pnv_ioda1_tce_free(struct i
   {
         pnv_tce_free(tbl, index, npages);
   
-       pnv_pci_p7ioc_tce_invalidate(tbl, index, npages, false);
+       pnv_pci_p7ioc_tce_invalidate(tbl, index, npages);
   }
   
   static struct iommu_table_ops pnv_ioda1_iommu_ops = {
@@@ -1362,18 -1354,18 +1355,18 @@@
   static inline void pnv_pci_phb3_tce_invalidate_pe(struct pnv_ioda_pe *pe)
   {
         /* 01xb - invalidate TCEs that match the specified PE# */
-       __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, false);
+       __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb);
         unsigned long val = PHB3_TCE_KILL_INVAL_PE | (pe->pe_number & 0xFF);
   
         mb(); /* Ensure above stores are visible */
         __raw_writeq_be(val, invalidate);
   }
   
- static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe, bool rm,
+ static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe,
                                         unsigned shift, unsigned long index,
                                         unsigned long npages)
   {
-       __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, rm);
+       __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb);
         unsigned long start, end, inc;
   
         /* We'll invalidate DMA address in PE scope */
@@@ -1388,10 -1380,7 +1381,7 @@@
         mb();
   
         while (start <= end) {
-               if (rm)
-                       __raw_rm_writeq_be(start, invalidate);
-               else
-                       __raw_writeq_be(start, invalidate);
+               __raw_writeq_be(start, invalidate);
                 start += inc;
         }
   }
@@@ -1408,7 -1397,7 +1398,7 @@@ static inline void pnv_pci_ioda2_tce_in
   }
   
   static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
-               unsigned long index, unsigned long npages, bool rm)
+               unsigned long index, unsigned long npages)
   {
         struct iommu_table_group_link *tgl;
   
@@@ -1419,7 -1408,7 +1409,7 @@@
                 unsigned int shift = tbl->it_page_shift;
   
                 if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs)
-                       pnv_pci_phb3_tce_invalidate(pe, rm, shift,
+                       pnv_pci_phb3_tce_invalidate(pe, shift,
                                                     index, npages);
                 else
                         opal_pci_tce_kill(phb->opal_id,
@@@ -1438,7 -1427,7 +1428,7 @@@ static int pnv_ioda2_tce_build(struct i
                         attrs);
   
         if (!ret)
-               pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
+               pnv_pci_ioda2_tce_invalidate(tbl, index, npages);
   
         return ret;
   }
@@@ -1448,7 -1437,7 +1438,7 @@@ static void pnv_ioda2_tce_free(struct i
   {
         pnv_tce_free(tbl, index, npages);
   
-       pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
+       pnv_pci_ioda2_tce_invalidate(tbl, index, npages);
   }
   
   static struct iommu_table_ops pnv_ioda2_iommu_ops = {
@@@ -2739,7 -2728,7 +2729,7 @@@ static void pnv_pci_ioda1_release_pe_dm
         if (rc != OPAL_SUCCESS)
                 return;
   
-       pnv_pci_p7ioc_tce_invalidate(tbl, tbl->it_offset, tbl->it_size, false);
+       pnv_pci_p7ioc_tce_invalidate(tbl, tbl->it_offset, tbl->it_size);
         if (pe->table_group.group) {
                 iommu_group_put(pe->table_group.group);
                 WARN_ON(pe->table_group.group);
diff --combined arch/powerpc/platforms/pseries/iommu.c

index 7639e73,309952a..fba6430
--- 1/arch/powerpc/platforms/pseries/iommu.c
--- 2/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@@ -666,8 -666,7 +666,7 @@@ static void pci_dma_bus_setup_pSeries(s
   
   #ifdef CONFIG_IOMMU_API
   static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned
-                               long *tce, enum dma_data_direction *direction,
-                               bool realmode)
+                               long *tce, enum dma_data_direction *direction)
   {
         long rc;
         unsigned long ioba = (unsigned long) index << tbl->it_page_shift;
@@@ -1430,7 -1429,7 +1429,7 @@@ static bool enable_ddw(struct pci_dev *
   
                 pci->table_group->tables[1] = newtbl;
   
- -              /* Keep default DMA window stuct if removed */
+ +              /* Keep default DMA window struct if removed */
                 if (default_win_removed) {
                         tbl->it_size = 0;
                         vfree(tbl->it_map);
author	Michael Ellerman <mpe@ellerman.id.au>
	Thu, 19 May 2022 13:10:42 +0000 (23:10 +1000)
committer	Michael Ellerman <mpe@ellerman.id.au>
	Thu, 19 May 2022 13:10:42 +0000 (23:10 +1000)
		1	2
arch/powerpc/kernel/iommu.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/book3s_64_mmu_hv.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/book3s_hv.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/book3s_hv_nested.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/book3s_hv_p9_entry.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/book3s_hv_uvmem.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/book3s_xive.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/e500mc.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/powerpc.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/mm/init_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/platforms/powernv/pci-ioda.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/platforms/pseries/iommu.c	patch \|	diff1 \|	diff2 \|	blob \| history