virt/kvm/arm/vgic/vgic.c

   1 /*
   2  * Copyright (C) 2015, 2016 ARM Ltd.
   3  *
   4  * This program is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License version 2 as
   6  * published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful,
   9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11  * GNU General Public License for more details.
  12  *
  13  * You should have received a copy of the GNU General Public License
  14  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  15  */
  16
  17 #include <linux/kvm.h>
  18 #include <linux/kvm_host.h>
  19 #include <linux/list_sort.h>
  20 #include <linux/interrupt.h>
  21 #include <linux/irq.h>
  22 #include <asm/kvm_hyp.h>
  23
  24 #include "vgic.h"
  25
  26 #define CREATE_TRACE_POINTS
  27 #include "trace.h"
  28
  29 #ifdef CONFIG_DEBUG_SPINLOCK
  30 #define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p)
  31 #else
  32 #define DEBUG_SPINLOCK_BUG_ON(p)
  33 #endif
  34
  35 struct vgic_global kvm_vgic_global_state __ro_after_init = {
  36         .gicv3_cpuif = STATIC_KEY_FALSE_INIT,
  37 };
  38
  39 /*
  40  * Locking order is always:
  41  * kvm->lock (mutex)
  42  *   its->cmd_lock (mutex)
  43  *     its->its_lock (mutex)
  44  *       vgic_cpu->ap_list_lock
  45  *         kvm->lpi_list_lock
  46  *           vgic_irq->irq_lock
  47  *
  48  * If you need to take multiple locks, always take the upper lock first,
  49  * then the lower ones, e.g. first take the its_lock, then the irq_lock.
  50  * If you are already holding a lock and need to take a higher one, you
  51  * have to drop the lower ranking lock first and re-aquire it after having
  52  * taken the upper one.
  53  *
  54  * When taking more than one ap_list_lock at the same time, always take the
  55  * lowest numbered VCPU's ap_list_lock first, so:
  56  *   vcpuX->vcpu_id < vcpuY->vcpu_id:
  57  *     spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock);
  58  *     spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock);
  59  *
  60  * Since the VGIC must support injecting virtual interrupts from ISRs, we have
  61  * to use the spin_lock_irqsave/spin_unlock_irqrestore versions of outer
  62  * spinlocks for any lock that may be taken while injecting an interrupt.
  63  */
  64
  65 /*
  66  * Iterate over the VM's list of mapped LPIs to find the one with a
  67  * matching interrupt ID and return a reference to the IRQ structure.
  68  */
  69 static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
  70 {
  71         struct vgic_dist *dist = &kvm->arch.vgic;
  72         struct vgic_irq *irq = NULL;
  73
  74         spin_lock(&dist->lpi_list_lock);
  75
  76         list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
  77                 if (irq->intid != intid)
  78                         continue;
  79
  80                 /*
  81                  * This increases the refcount, the caller is expected to
  82                  * call vgic_put_irq() later once it's finished with the IRQ.
  83                  */
  84                 vgic_get_irq_kref(irq);
  85                 goto out_unlock;
  86         }
  87         irq = NULL;
  88
  89 out_unlock:
  90         spin_unlock(&dist->lpi_list_lock);
  91
  92         return irq;
  93 }
  94
  95 /*
  96  * This looks up the virtual interrupt ID to get the corresponding
  97  * struct vgic_irq. It also increases the refcount, so any caller is expected
  98  * to call vgic_put_irq() once it's finished with this IRQ.
  99  */
 100 struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
 101                               u32 intid)
 102 {
 103         /* SGIs and PPIs */
 104         if (intid <= VGIC_MAX_PRIVATE)
 105                 return &vcpu->arch.vgic_cpu.private_irqs[intid];
 106
 107         /* SPIs */
 108         if (intid <= VGIC_MAX_SPI)
 109                 return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
 110
 111         /* LPIs */
 112         if (intid >= VGIC_MIN_LPI)
 113                 return vgic_get_lpi(kvm, intid);
 114
 115         WARN(1, "Looking up struct vgic_irq for reserved INTID");
 116         return NULL;
 117 }
 118
 119 /*
 120  * We can't do anything in here, because we lack the kvm pointer to
 121  * lock and remove the item from the lpi_list. So we keep this function
 122  * empty and use the return value of kref_put() to trigger the freeing.
 123  */
 124 static void vgic_irq_release(struct kref *ref)
 125 {
 126 }
 127
 128 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
 129 {
 130         struct vgic_dist *dist = &kvm->arch.vgic;
 131
 132         if (irq->intid < VGIC_MIN_LPI)
 133                 return;
 134
 135         spin_lock(&dist->lpi_list_lock);
 136         if (!kref_put(&irq->refcount, vgic_irq_release)) {
 137                 spin_unlock(&dist->lpi_list_lock);
 138                 return;
 139         };
 140
 141         list_del(&irq->lpi_list);
 142         dist->lpi_list_count--;
 143         spin_unlock(&dist->lpi_list_lock);
 144
 145         kfree(irq);
 146 }
 147
 148 void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
 149 {
 150         WARN_ON(irq_set_irqchip_state(irq->host_irq,
 151                                       IRQCHIP_STATE_PENDING,
 152                                       pending));
 153 }
 154
 155 bool vgic_get_phys_line_level(struct vgic_irq *irq)
 156 {
 157         bool line_level;
 158
 159         BUG_ON(!irq->hw);
 160
 161         if (irq->get_input_level)
 162                 return irq->get_input_level(irq->intid);
 163
 164         WARN_ON(irq_get_irqchip_state(irq->host_irq,
 165                                       IRQCHIP_STATE_PENDING,
 166                                       &line_level));
 167         return line_level;
 168 }
 169
 170 /* Set/Clear the physical active state */
 171 void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
 172 {
 173
 174         BUG_ON(!irq->hw);
 175         WARN_ON(irq_set_irqchip_state(irq->host_irq,
 176                                       IRQCHIP_STATE_ACTIVE,
 177                                       active));
 178 }
 179
 180 /**
 181  * kvm_vgic_target_oracle - compute the target vcpu for an irq
 182  *
 183  * @irq:        The irq to route. Must be already locked.
 184  *
 185  * Based on the current state of the interrupt (enabled, pending,
 186  * active, vcpu and target_vcpu), compute the next vcpu this should be
 187  * given to. Return NULL if this shouldn't be injected at all.
 188  *
 189  * Requires the IRQ lock to be held.
 190  */
 191 static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
 192 {
 193         DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
 194
 195         /* If the interrupt is active, it must stay on the current vcpu */
 196         if (irq->active)
 197                 return irq->vcpu ? : irq->target_vcpu;
 198
 199         /*
 200          * If the IRQ is not active but enabled and pending, we should direct
 201          * it to its configured target VCPU.
 202          * If the distributor is disabled, pending interrupts shouldn't be
 203          * forwarded.
 204          */
 205         if (irq->enabled && irq_is_pending(irq)) {
 206                 if (unlikely(irq->target_vcpu &&
 207                              !irq->target_vcpu->kvm->arch.vgic.enabled))
 208                         return NULL;
 209
 210                 return irq->target_vcpu;
 211         }
 212
 213         /* If neither active nor pending and enabled, then this IRQ should not
 214          * be queued to any VCPU.
 215          */
 216         return NULL;
 217 }
 218
 219 /*
 220  * The order of items in the ap_lists defines how we'll pack things in LRs as
 221  * well, the first items in the list being the first things populated in the
 222  * LRs.
 223  *
 224  * A hard rule is that active interrupts can never be pushed out of the LRs
 225  * (and therefore take priority) since we cannot reliably trap on deactivation
 226  * of IRQs and therefore they have to be present in the LRs.
 227  *
 228  * Otherwise things should be sorted by the priority field and the GIC
 229  * hardware support will take care of preemption of priority groups etc.
 230  *
 231  * Return negative if "a" sorts before "b", 0 to preserve order, and positive
 232  * to sort "b" before "a".
 233  */
 234 static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b)
 235 {
 236         struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
 237         struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
 238         bool penda, pendb;
 239         int ret;
 240
 241         spin_lock(&irqa->irq_lock);
 242         spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
 243
 244         if (irqa->active || irqb->active) {
 245                 ret = (int)irqb->active - (int)irqa->active;
 246                 goto out;
 247         }
 248
 249         penda = irqa->enabled && irq_is_pending(irqa);
 250         pendb = irqb->enabled && irq_is_pending(irqb);
 251
 252         if (!penda || !pendb) {
 253                 ret = (int)pendb - (int)penda;
 254                 goto out;
 255         }
 256
 257         /* Both pending and enabled, sort by priority */
 258         ret = irqa->priority - irqb->priority;
 259 out:
 260         spin_unlock(&irqb->irq_lock);
 261         spin_unlock(&irqa->irq_lock);
 262         return ret;
 263 }
 264
 265 /* Must be called with the ap_list_lock held */
 266 static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
 267 {
 268         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 269
 270         DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
 271
 272         list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
 273 }
 274
 275 /*
 276  * Only valid injection if changing level for level-triggered IRQs or for a
 277  * rising edge, and in-kernel connected IRQ lines can only be controlled by
 278  * their owner.
 279  */
 280 static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner)
 281 {
 282         if (irq->owner != owner)
 283                 return false;
 284
 285         switch (irq->config) {
 286         case VGIC_CONFIG_LEVEL:
 287                 return irq->line_level != level;
 288         case VGIC_CONFIG_EDGE:
 289                 return level;
 290         }
 291
 292         return false;
 293 }
 294
 295 /*
 296  * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list.
 297  * Do the queuing if necessary, taking the right locks in the right order.
 298  * Returns true when the IRQ was queued, false otherwise.
 299  *
 300  * Needs to be entered with the IRQ lock already held, but will return
 301  * with all locks dropped.
 302  */
 303 bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
 304                            unsigned long flags)
 305 {
 306         struct kvm_vcpu *vcpu;
 307
 308         DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
 309
 310 retry:
 311         vcpu = vgic_target_oracle(irq);
 312         if (irq->vcpu || !vcpu) {
 313                 /*
 314                  * If this IRQ is already on a VCPU's ap_list, then it
 315                  * cannot be moved or modified and there is no more work for
 316                  * us to do.
 317                  *
 318                  * Otherwise, if the irq is not pending and enabled, it does
 319                  * not need to be inserted into an ap_list and there is also
 320                  * no more work for us to do.
 321                  */
 322                 spin_unlock_irqrestore(&irq->irq_lock, flags);
 323
 324                 /*
 325                  * We have to kick the VCPU here, because we could be
 326                  * queueing an edge-triggered interrupt for which we
 327                  * get no EOI maintenance interrupt. In that case,
 328                  * while the IRQ is already on the VCPU's AP list, the
 329                  * VCPU could have EOI'ed the original interrupt and
 330                  * won't see this one until it exits for some other
 331                  * reason.
 332                  */
 333                 if (vcpu) {
 334                         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 335                         kvm_vcpu_kick(vcpu);
 336                 }
 337                 return false;
 338         }
 339
 340         /*
 341          * We must unlock the irq lock to take the ap_list_lock where
 342          * we are going to insert this new pending interrupt.
 343          */
 344         spin_unlock_irqrestore(&irq->irq_lock, flags);
 345
 346         /* someone can do stuff here, which we re-check below */
 347
 348         spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
 349         spin_lock(&irq->irq_lock);
 350
 351         /*
 352          * Did something change behind our backs?
 353          *
 354          * There are two cases:
 355          * 1) The irq lost its pending state or was disabled behind our
 356          *    backs and/or it was queued to another VCPU's ap_list.
 357          * 2) Someone changed the affinity on this irq behind our
 358          *    backs and we are now holding the wrong ap_list_lock.
 359          *
 360          * In both cases, drop the locks and retry.
 361          */
 362
 363         if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) {
 364                 spin_unlock(&irq->irq_lock);
 365                 spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
 366
 367                 spin_lock_irqsave(&irq->irq_lock, flags);
 368                 goto retry;
 369         }
 370
 371         /*
 372          * Grab a reference to the irq to reflect the fact that it is
 373          * now in the ap_list.
 374          */
 375         vgic_get_irq_kref(irq);
 376         list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
 377         irq->vcpu = vcpu;
 378
 379         spin_unlock(&irq->irq_lock);
 380         spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
 381
 382         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 383         kvm_vcpu_kick(vcpu);
 384
 385         return true;
 386 }
 387
 388 /**
 389  * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
 390  * @kvm:     The VM structure pointer
 391  * @cpuid:   The CPU for PPIs
 392  * @intid:   The INTID to inject a new state to.
 393  * @level:   Edge-triggered:  true:  to trigger the interrupt
 394  *                            false: to ignore the call
 395  *           Level-sensitive  true:  raise the input signal
 396  *                            false: lower the input signal
 397  * @owner:   The opaque pointer to the owner of the IRQ being raised to verify
 398  *           that the caller is allowed to inject this IRQ.  Userspace
 399  *           injections will have owner == NULL.
 400  *
 401  * The VGIC is not concerned with devices being active-LOW or active-HIGH for
 402  * level-sensitive interrupts.  You can think of the level parameter as 1
 403  * being HIGH and 0 being LOW and all devices being active-HIGH.
 404  */
 405 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
 406                         bool level, void *owner)
 407 {
 408         struct kvm_vcpu *vcpu;
 409         struct vgic_irq *irq;
 410         unsigned long flags;
 411         int ret;
 412
 413         trace_vgic_update_irq_pending(cpuid, intid, level);
 414
 415         ret = vgic_lazy_init(kvm);
 416         if (ret)
 417                 return ret;
 418
 419         vcpu = kvm_get_vcpu(kvm, cpuid);
 420         if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS)
 421                 return -EINVAL;
 422
 423         irq = vgic_get_irq(kvm, vcpu, intid);
 424         if (!irq)
 425                 return -EINVAL;
 426
 427         spin_lock_irqsave(&irq->irq_lock, flags);
 428
 429         if (!vgic_validate_injection(irq, level, owner)) {
 430                 /* Nothing to see here, move along... */
 431                 spin_unlock_irqrestore(&irq->irq_lock, flags);
 432                 vgic_put_irq(kvm, irq);
 433                 return 0;
 434         }
 435
 436         if (irq->config == VGIC_CONFIG_LEVEL)
 437                 irq->line_level = level;
 438         else
 439                 irq->pending_latch = true;
 440
 441         vgic_queue_irq_unlock(kvm, irq, flags);
 442         vgic_put_irq(kvm, irq);
 443
 444         return 0;
 445 }
 446
 447 /* @irq->irq_lock must be held */
 448 static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
 449                             unsigned int host_irq,
 450                             bool (*get_input_level)(int vindid))
 451 {
 452         struct irq_desc *desc;
 453         struct irq_data *data;
 454
 455         /*
 456          * Find the physical IRQ number corresponding to @host_irq
 457          */
 458         desc = irq_to_desc(host_irq);
 459         if (!desc) {
 460                 kvm_err("%s: no interrupt descriptor\n", __func__);
 461                 return -EINVAL;
 462         }
 463         data = irq_desc_get_irq_data(desc);
 464         while (data->parent_data)
 465                 data = data->parent_data;
 466
 467         irq->hw = true;
 468         irq->host_irq = host_irq;
 469         irq->hwintid = data->hwirq;
 470         irq->get_input_level = get_input_level;
 471         return 0;
 472 }
 473
 474 /* @irq->irq_lock must be held */
 475 static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
 476 {
 477         irq->hw = false;
 478         irq->hwintid = 0;
 479         irq->get_input_level = NULL;
 480 }
 481
 482 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
 483                           u32 vintid, bool (*get_input_level)(int vindid))
 484 {
 485         struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 486         unsigned long flags;
 487         int ret;
 488
 489         BUG_ON(!irq);
 490
 491         spin_lock_irqsave(&irq->irq_lock, flags);
 492         ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level);
 493         spin_unlock_irqrestore(&irq->irq_lock, flags);
 494         vgic_put_irq(vcpu->kvm, irq);
 495
 496         return ret;
 497 }
 498
 499 /**
 500  * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ
 501  * @vcpu: The VCPU pointer
 502  * @vintid: The INTID of the interrupt
 503  *
 504  * Reset the active and pending states of a mapped interrupt.  Kernel
 505  * subsystems injecting mapped interrupts should reset their interrupt lines
 506  * when we are doing a reset of the VM.
 507  */
 508 void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid)
 509 {
 510         struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 511         unsigned long flags;
 512
 513         if (!irq->hw)
 514                 goto out;
 515
 516         spin_lock_irqsave(&irq->irq_lock, flags);
 517         irq->active = false;
 518         irq->pending_latch = false;
 519         irq->line_level = false;
 520         spin_unlock_irqrestore(&irq->irq_lock, flags);
 521 out:
 522         vgic_put_irq(vcpu->kvm, irq);
 523 }
 524
 525 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
 526 {
 527         struct vgic_irq *irq;
 528         unsigned long flags;
 529
 530         if (!vgic_initialized(vcpu->kvm))
 531                 return -EAGAIN;
 532
 533         irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 534         BUG_ON(!irq);
 535
 536         spin_lock_irqsave(&irq->irq_lock, flags);
 537         kvm_vgic_unmap_irq(irq);
 538         spin_unlock_irqrestore(&irq->irq_lock, flags);
 539         vgic_put_irq(vcpu->kvm, irq);
 540
 541         return 0;
 542 }
 543
 544 /**
 545  * kvm_vgic_set_owner - Set the owner of an interrupt for a VM
 546  *
 547  * @vcpu:   Pointer to the VCPU (used for PPIs)
 548  * @intid:  The virtual INTID identifying the interrupt (PPI or SPI)
 549  * @owner:  Opaque pointer to the owner
 550  *
 551  * Returns 0 if intid is not already used by another in-kernel device and the
 552  * owner is set, otherwise returns an error code.
 553  */
 554 int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
 555 {
 556         struct vgic_irq *irq;
 557         unsigned long flags;
 558         int ret = 0;
 559
 560         if (!vgic_initialized(vcpu->kvm))
 561                 return -EAGAIN;
 562
 563         /* SGIs and LPIs cannot be wired up to any device */
 564         if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid))
 565                 return -EINVAL;
 566
 567         irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
 568         spin_lock_irqsave(&irq->irq_lock, flags);
 569         if (irq->owner && irq->owner != owner)
 570                 ret = -EEXIST;
 571         else
 572                 irq->owner = owner;
 573         spin_unlock_irqrestore(&irq->irq_lock, flags);
 574
 575         return ret;
 576 }
 577
 578 /**
 579  * vgic_prune_ap_list - Remove non-relevant interrupts from the list
 580  *
 581  * @vcpu: The VCPU pointer
 582  *
 583  * Go over the list of "interesting" interrupts, and prune those that we
 584  * won't have to consider in the near future.
 585  */
 586 static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
 587 {
 588         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 589         struct vgic_irq *irq, *tmp;
 590         unsigned long flags;
 591
 592 retry:
 593         spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
 594
 595         list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
 596                 struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
 597
 598                 spin_lock(&irq->irq_lock);
 599
 600                 BUG_ON(vcpu != irq->vcpu);
 601
 602                 target_vcpu = vgic_target_oracle(irq);
 603
 604                 if (!target_vcpu) {
 605                         /*
 606                          * We don't need to process this interrupt any
 607                          * further, move it off the list.
 608                          */
 609                         list_del(&irq->ap_list);
 610                         irq->vcpu = NULL;
 611                         spin_unlock(&irq->irq_lock);
 612
 613                         /*
 614                          * This vgic_put_irq call matches the
 615                          * vgic_get_irq_kref in vgic_queue_irq_unlock,
 616                          * where we added the LPI to the ap_list. As
 617                          * we remove the irq from the list, we drop
 618                          * also drop the refcount.
 619                          */
 620                         vgic_put_irq(vcpu->kvm, irq);
 621                         continue;
 622                 }
 623
 624                 if (target_vcpu == vcpu) {
 625                         /* We're on the right CPU */
 626                         spin_unlock(&irq->irq_lock);
 627                         continue;
 628                 }
 629
 630                 /* This interrupt looks like it has to be migrated. */
 631
 632                 spin_unlock(&irq->irq_lock);
 633                 spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
 634
 635                 /*
 636                  * Ensure locking order by always locking the smallest
 637                  * ID first.
 638                  */
 639                 if (vcpu->vcpu_id < target_vcpu->vcpu_id) {
 640                         vcpuA = vcpu;
 641                         vcpuB = target_vcpu;
 642                 } else {
 643                         vcpuA = target_vcpu;
 644                         vcpuB = vcpu;
 645                 }
 646
 647                 spin_lock_irqsave(&vcpuA->arch.vgic_cpu.ap_list_lock, flags);
 648                 spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
 649                                  SINGLE_DEPTH_NESTING);
 650                 spin_lock(&irq->irq_lock);
 651
 652                 /*
 653                  * If the affinity has been preserved, move the
 654                  * interrupt around. Otherwise, it means things have
 655                  * changed while the interrupt was unlocked, and we
 656                  * need to replay this.
 657                  *
 658                  * In all cases, we cannot trust the list not to have
 659                  * changed, so we restart from the beginning.
 660                  */
 661                 if (target_vcpu == vgic_target_oracle(irq)) {
 662                         struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu;
 663
 664                         list_del(&irq->ap_list);
 665                         irq->vcpu = target_vcpu;
 666                         list_add_tail(&irq->ap_list, &new_cpu->ap_list_head);
 667                 }
 668
 669                 spin_unlock(&irq->irq_lock);
 670                 spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
 671                 spin_unlock_irqrestore(&vcpuA->arch.vgic_cpu.ap_list_lock, flags);
 672                 goto retry;
 673         }
 674
 675         spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
 676 }
 677
 678 static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
 679 {
 680         if (kvm_vgic_global_state.type == VGIC_V2)
 681                 vgic_v2_fold_lr_state(vcpu);
 682         else
 683                 vgic_v3_fold_lr_state(vcpu);
 684 }
 685
 686 /* Requires the irq_lock to be held. */
 687 static inline void vgic_populate_lr(struct kvm_vcpu *vcpu,
 688                                     struct vgic_irq *irq, int lr)
 689 {
 690         DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
 691
 692         if (kvm_vgic_global_state.type == VGIC_V2)
 693                 vgic_v2_populate_lr(vcpu, irq, lr);
 694         else
 695                 vgic_v3_populate_lr(vcpu, irq, lr);
 696 }
 697
 698 static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr)
 699 {
 700         if (kvm_vgic_global_state.type == VGIC_V2)
 701                 vgic_v2_clear_lr(vcpu, lr);
 702         else
 703                 vgic_v3_clear_lr(vcpu, lr);
 704 }
 705
 706 static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
 707 {
 708         if (kvm_vgic_global_state.type == VGIC_V2)
 709                 vgic_v2_set_underflow(vcpu);
 710         else
 711                 vgic_v3_set_underflow(vcpu);
 712 }
 713
 714 static inline void vgic_set_npie(struct kvm_vcpu *vcpu)
 715 {
 716         if (kvm_vgic_global_state.type == VGIC_V2)
 717                 vgic_v2_set_npie(vcpu);
 718         else
 719                 vgic_v3_set_npie(vcpu);
 720 }
 721
 722 /* Requires the ap_list_lock to be held. */
 723 static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
 724                                  bool *multi_sgi)
 725 {
 726         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 727         struct vgic_irq *irq;
 728         int count = 0;
 729
 730         *multi_sgi = false;
 731
 732         DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
 733
 734         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 735                 spin_lock(&irq->irq_lock);
 736                 /* GICv2 SGIs can count for more than one... */
 737                 if (vgic_irq_is_sgi(irq->intid) && irq->source) {
 738                         int w = hweight8(irq->source);
 739
 740                         count += w;
 741                         *multi_sgi |= (w > 1);
 742                 } else {
 743                         count++;
 744                 }
 745                 spin_unlock(&irq->irq_lock);
 746         }
 747         return count;
 748 }
 749
 750 /* Requires the VCPU's ap_list_lock to be held. */
 751 static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
 752 {
 753         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 754         struct vgic_irq *irq;
 755         int count;
 756         bool npie = false;
 757         bool multi_sgi;
 758         u8 prio = 0xff;
 759
 760         DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
 761
 762         count = compute_ap_list_depth(vcpu, &multi_sgi);
 763         if (count > kvm_vgic_global_state.nr_lr || multi_sgi)
 764                 vgic_sort_ap_list(vcpu);
 765
 766         count = 0;
 767
 768         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 769                 spin_lock(&irq->irq_lock);
 770
 771                 /*
 772                  * If we have multi-SGIs in the pipeline, we need to
 773                  * guarantee that they are all seen before any IRQ of
 774                  * lower priority. In that case, we need to filter out
 775                  * these interrupts by exiting early. This is easy as
 776                  * the AP list has been sorted already.
 777                  */
 778                 if (multi_sgi && irq->priority > prio) {
 779                         spin_unlock(&irq->irq_lock);
 780                         break;
 781                 }
 782
 783                 if (likely(vgic_target_oracle(irq) == vcpu)) {
 784                         vgic_populate_lr(vcpu, irq, count++);
 785
 786                         if (irq->source) {
 787                                 npie = true;
 788                                 prio = irq->priority;
 789                         }
 790                 }
 791
 792                 spin_unlock(&irq->irq_lock);
 793
 794                 if (count == kvm_vgic_global_state.nr_lr) {
 795                         if (!list_is_last(&irq->ap_list,
 796                                           &vgic_cpu->ap_list_head))
 797                                 vgic_set_underflow(vcpu);
 798                         break;
 799                 }
 800         }
 801
 802         if (npie)
 803                 vgic_set_npie(vcpu);
 804
 805         vcpu->arch.vgic_cpu.used_lrs = count;
 806
 807         /* Nuke remaining LRs */
 808         for ( ; count < kvm_vgic_global_state.nr_lr; count++)
 809                 vgic_clear_lr(vcpu, count);
 810 }
 811
 812 static inline bool can_access_vgic_from_kernel(void)
 813 {
 814         /*
 815          * GICv2 can always be accessed from the kernel because it is
 816          * memory-mapped, and VHE systems can access GICv3 EL2 system
 817          * registers.
 818          */
 819         return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe();
 820 }
 821
 822 static inline void vgic_save_state(struct kvm_vcpu *vcpu)
 823 {
 824         if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
 825                 vgic_v2_save_state(vcpu);
 826         else
 827                 __vgic_v3_save_state(vcpu);
 828 }
 829
 830 /* Sync back the hardware VGIC state into our emulation after a guest's run. */
 831 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 832 {
 833         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 834
 835         WARN_ON(vgic_v4_sync_hwstate(vcpu));
 836
 837         /* An empty ap_list_head implies used_lrs == 0 */
 838         if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
 839                 return;
 840
 841         if (can_access_vgic_from_kernel())
 842                 vgic_save_state(vcpu);
 843
 844         if (vgic_cpu->used_lrs)
 845                 vgic_fold_lr_state(vcpu);
 846         vgic_prune_ap_list(vcpu);
 847 }
 848
 849 static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
 850 {
 851         if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
 852                 vgic_v2_restore_state(vcpu);
 853         else
 854                 __vgic_v3_restore_state(vcpu);
 855 }
 856
 857 /* Flush our emulation state into the GIC hardware before entering the guest. */
 858 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 859 {
 860         WARN_ON(vgic_v4_flush_hwstate(vcpu));
 861
 862         /*
 863          * If there are no virtual interrupts active or pending for this
 864          * VCPU, then there is no work to do and we can bail out without
 865          * taking any lock.  There is a potential race with someone injecting
 866          * interrupts to the VCPU, but it is a benign race as the VCPU will
 867          * either observe the new interrupt before or after doing this check,
 868          * and introducing additional synchronization mechanism doesn't change
 869          * this.
 870          */
 871         if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
 872                 return;
 873
 874         DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
 875
 876         spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
 877         vgic_flush_lr_state(vcpu);
 878         spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
 879
 880         if (can_access_vgic_from_kernel())
 881                 vgic_restore_state(vcpu);
 882 }
 883
 884 void kvm_vgic_load(struct kvm_vcpu *vcpu)
 885 {
 886         if (unlikely(!vgic_initialized(vcpu->kvm)))
 887                 return;
 888
 889         if (kvm_vgic_global_state.type == VGIC_V2)
 890                 vgic_v2_load(vcpu);
 891         else
 892                 vgic_v3_load(vcpu);
 893 }
 894
 895 void kvm_vgic_put(struct kvm_vcpu *vcpu)
 896 {
 897         if (unlikely(!vgic_initialized(vcpu->kvm)))
 898                 return;
 899
 900         if (kvm_vgic_global_state.type == VGIC_V2)
 901                 vgic_v2_put(vcpu);
 902         else
 903                 vgic_v3_put(vcpu);
 904 }
 905
 906 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
 907 {
 908         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 909         struct vgic_irq *irq;
 910         bool pending = false;
 911         unsigned long flags;
 912
 913         if (!vcpu->kvm->arch.vgic.enabled)
 914                 return false;
 915
 916         if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last)
 917                 return true;
 918
 919         spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
 920
 921         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 922                 spin_lock(&irq->irq_lock);
 923                 pending = irq_is_pending(irq) && irq->enabled;
 924                 spin_unlock(&irq->irq_lock);
 925
 926                 if (pending)
 927                         break;
 928         }
 929
 930         spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
 931
 932         return pending;
 933 }
 934
 935 void vgic_kick_vcpus(struct kvm *kvm)
 936 {
 937         struct kvm_vcpu *vcpu;
 938         int c;
 939
 940         /*
 941          * We've injected an interrupt, time to find out who deserves
 942          * a good kick...
 943          */
 944         kvm_for_each_vcpu(c, vcpu, kvm) {
 945                 if (kvm_vgic_vcpu_pending_irq(vcpu)) {
 946                         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 947                         kvm_vcpu_kick(vcpu);
 948                 }
 949         }
 950 }
 951
 952 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
 953 {
 954         struct vgic_irq *irq;
 955         bool map_is_active;
 956         unsigned long flags;
 957
 958         if (!vgic_initialized(vcpu->kvm))
 959                 return false;
 960
 961         irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 962         spin_lock_irqsave(&irq->irq_lock, flags);
 963         map_is_active = irq->hw && irq->active;
 964         spin_unlock_irqrestore(&irq->irq_lock, flags);
 965         vgic_put_irq(vcpu->kvm, irq);
 966
 967         return map_is_active;
 968 }
 969