enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
 
 extern int powersave_nap;      /* set if nap mode can be used in idle loop */
-extern void power7_nap(int check_irq);
+extern unsigned long power7_nap(int check_irq);
 extern void power7_sleep(void);
 extern void flush_instruction_cache(void);
 extern void hard_reset_now(void);
 
        mtspr   SPRN_SRR0,r5
        rfid
 
+/*
+ * R3 here contains the value that will be returned to the caller
+ * of power7_nap.
+ */
 _GLOBAL(power7_wakeup_loss)
        ld      r1,PACAR1(r13)
 BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
        REST_NVGPRS(r1)
        REST_GPR(2, r1)
-       ld      r3,_CCR(r1)
+       ld      r6,_CCR(r1)
        ld      r4,_MSR(r1)
        ld      r5,_NIP(r1)
        addi    r1,r1,INT_FRAME_SIZE
-       mtcr    r3
+       mtcr    r6
        mtspr   SPRN_SRR1,r4
        mtspr   SPRN_SRR0,r5
        rfid
 
+/*
+ * R3 here contains the value that will be returned to the caller
+ * of power7_nap.
+ */
 _GLOBAL(power7_wakeup_noloss)
        lbz     r0,PACA_NAPSTATELOST(r13)
        cmpwi   r0,0
 
        bge     kvm_novcpu_exit /* another thread already exiting */
        li      r3, NAPPING_NOVCPU
        stb     r3, HSTATE_NAPPING(r13)
-       li      r3, 1
-       stb     r3, HSTATE_HWTHREAD_REQ(r13)
 
        b       kvm_do_nap
 
        /* if we have no vcpu to run, go back to sleep */
        beq     kvm_no_guest
 
+kvm_secondary_got_guest:
+
        /* Set HSTATE_DSCR(r13) to something sensible */
        ld      r6, PACA_DSCR(r13)
        std     r6, HSTATE_DSCR(r13)
        stwcx.  r3, 0, r4
        bne     51b
 
+/*
+ * At this point we have finished executing in the guest.
+ * We need to wait for hwthread_req to become zero, since
+ * we may not turn on the MMU while hwthread_req is non-zero.
+ * While waiting we also need to check if we get given a vcpu to run.
+ */
 kvm_no_guest:
-       li      r0, KVM_HWTHREAD_IN_NAP
+       lbz     r3, HSTATE_HWTHREAD_REQ(r13)
+       cmpwi   r3, 0
+       bne     53f
+       HMT_MEDIUM
+       li      r0, KVM_HWTHREAD_IN_KERNEL
        stb     r0, HSTATE_HWTHREAD_STATE(r13)
-kvm_do_nap:
-       /* Clear the runlatch bit before napping */
-       mfspr   r2, SPRN_CTRLF
-       clrrdi  r2, r2, 1
-       mtspr   SPRN_CTRLT, r2
-
+       /* need to recheck hwthread_req after a barrier, to avoid race */
+       sync
+       lbz     r3, HSTATE_HWTHREAD_REQ(r13)
+       cmpwi   r3, 0
+       bne     54f
+/*
+ * We jump to power7_wakeup_loss, which will return to the caller
+ * of power7_nap in the powernv cpu offline loop.  The value we
+ * put in r3 becomes the return value for power7_nap.
+ */
        li      r3, LPCR_PECE0
        mfspr   r4, SPRN_LPCR
        rlwimi  r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
        mtspr   SPRN_LPCR, r4
-       isync
-       std     r0, HSTATE_SCRATCH0(r13)
-       ptesync
-       ld      r0, HSTATE_SCRATCH0(r13)
-1:     cmpd    r0, r0
-       bne     1b
-       nap
-       b       .
+       li      r3, 0
+       b       power7_wakeup_loss
+
+53:    HMT_LOW
+       ld      r4, HSTATE_KVM_VCPU(r13)
+       cmpdi   r4, 0
+       beq     kvm_no_guest
+       HMT_MEDIUM
+       b       kvm_secondary_got_guest
+
+54:    li      r0, KVM_HWTHREAD_IN_KVM
+       stb     r0, HSTATE_HWTHREAD_STATE(r13)
+       b       kvm_no_guest
 
 /******************************************************************************
  *                                                                            *
         * occurs, with PECE1, PECE0 and PECEDP set in LPCR. Also clear the
         * runlatch bit before napping.
         */
+kvm_do_nap:
        mfspr   r2, SPRN_CTRLF
        clrrdi  r2, r2, 1
        mtspr   SPRN_CTRLT, r2
 
 static void pnv_smp_cpu_kill_self(void)
 {
        unsigned int cpu;
+       unsigned long srr1;
 
        /* Standard hot unplug procedure */
        local_irq_disable();
        mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
        while (!generic_check_cpu_restart(cpu)) {
                ppc64_runlatch_off();
-               power7_nap(1);
+               srr1 = power7_nap(1);
                ppc64_runlatch_on();
 
-               /* Clear the IPI that woke us up */
-               icp_native_flush_interrupt();
-               local_paca->irq_happened &= PACA_IRQ_HARD_DIS;
-               mb();
+               /*
+                * If the SRR1 value indicates that we woke up due to
+                * an external interrupt, then clear the interrupt.
+                * We clear the interrupt before checking for the
+                * reason, so as to avoid a race where we wake up for
+                * some other reason, find nothing and clear the interrupt
+                * just as some other cpu is sending us an interrupt.
+                * If we returned from power7_nap as a result of
+                * having finished executing in a KVM guest, then srr1
+                * contains 0.
+                */
+               if ((srr1 & SRR1_WAKEMASK) == SRR1_WAKEEE) {
+                       icp_native_flush_interrupt();
+                       local_paca->irq_happened &= PACA_IRQ_HARD_DIS;
+                       smp_mb();
+               }
 
                if (cpu_core_split_required())
                        continue;