Merge remote-tracking branch 'spi/for-5.14' into spi-next
[linux-2.6-microblaze.git] / arch / x86 / kvm / vmx / posted_intr.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/kvm_host.h>
3
4 #include <asm/irq_remapping.h>
5 #include <asm/cpu.h>
6
7 #include "lapic.h"
8 #include "posted_intr.h"
9 #include "trace.h"
10 #include "vmx.h"
11
12 /*
13  * We maintain a per-CPU linked-list of vCPU, so in wakeup_handler() we
14  * can find which vCPU should be waken up.
15  */
16 static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
17 static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
18
19 static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
20 {
21         return &(to_vmx(vcpu)->pi_desc);
22 }
23
24 void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
25 {
26         struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
27         struct pi_desc old, new;
28         unsigned int dest;
29
30         /*
31          * In case of hot-plug or hot-unplug, we may have to undo
32          * vmx_vcpu_pi_put even if there is no assigned device.  And we
33          * always keep PI.NDST up to date for simplicity: it makes the
34          * code easier, and CPU migration is not a fast path.
35          */
36         if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
37                 return;
38
39         /*
40          * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
41          * PI.NDST: pi_post_block is the one expected to change PID.NDST and the
42          * wakeup handler expects the vCPU to be on the blocked_vcpu_list that
43          * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up
44          * correctly.
45          */
46         if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) {
47                 pi_clear_sn(pi_desc);
48                 goto after_clear_sn;
49         }
50
51         /* The full case.  */
52         do {
53                 old.control = new.control = pi_desc->control;
54
55                 dest = cpu_physical_id(cpu);
56
57                 if (x2apic_mode)
58                         new.ndst = dest;
59                 else
60                         new.ndst = (dest << 8) & 0xFF00;
61
62                 new.sn = 0;
63         } while (cmpxchg64(&pi_desc->control, old.control,
64                            new.control) != old.control);
65
66 after_clear_sn:
67
68         /*
69          * Clear SN before reading the bitmap.  The VT-d firmware
70          * writes the bitmap and reads SN atomically (5.2.3 in the
71          * spec), so it doesn't really have a memory barrier that
72          * pairs with this, but we cannot do that and we need one.
73          */
74         smp_mb__after_atomic();
75
76         if (!pi_is_pir_empty(pi_desc))
77                 pi_set_on(pi_desc);
78 }
79
80 void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
81 {
82         struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
83
84         if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
85                 !irq_remapping_cap(IRQ_POSTING_CAP)  ||
86                 !kvm_vcpu_apicv_active(vcpu))
87                 return;
88
89         /* Set SN when the vCPU is preempted */
90         if (vcpu->preempted)
91                 pi_set_sn(pi_desc);
92 }
93
94 static void __pi_post_block(struct kvm_vcpu *vcpu)
95 {
96         struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
97         struct pi_desc old, new;
98         unsigned int dest;
99
100         do {
101                 old.control = new.control = pi_desc->control;
102                 WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
103                      "Wakeup handler not enabled while the VCPU is blocked\n");
104
105                 dest = cpu_physical_id(vcpu->cpu);
106
107                 if (x2apic_mode)
108                         new.ndst = dest;
109                 else
110                         new.ndst = (dest << 8) & 0xFF00;
111
112                 /* set 'NV' to 'notification vector' */
113                 new.nv = POSTED_INTR_VECTOR;
114         } while (cmpxchg64(&pi_desc->control, old.control,
115                            new.control) != old.control);
116
117         if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
118                 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
119                 list_del(&vcpu->blocked_vcpu_list);
120                 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
121                 vcpu->pre_pcpu = -1;
122         }
123 }
124
125 /*
126  * This routine does the following things for vCPU which is going
127  * to be blocked if VT-d PI is enabled.
128  * - Store the vCPU to the wakeup list, so when interrupts happen
129  *   we can find the right vCPU to wake up.
130  * - Change the Posted-interrupt descriptor as below:
131  *      'NDST' <-- vcpu->pre_pcpu
132  *      'NV' <-- POSTED_INTR_WAKEUP_VECTOR
133  * - If 'ON' is set during this process, which means at least one
134  *   interrupt is posted for this vCPU, we cannot block it, in
135  *   this case, return 1, otherwise, return 0.
136  *
137  */
138 int pi_pre_block(struct kvm_vcpu *vcpu)
139 {
140         unsigned int dest;
141         struct pi_desc old, new;
142         struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
143
144         if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
145                 !irq_remapping_cap(IRQ_POSTING_CAP)  ||
146                 !kvm_vcpu_apicv_active(vcpu))
147                 return 0;
148
149         WARN_ON(irqs_disabled());
150         local_irq_disable();
151         if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
152                 vcpu->pre_pcpu = vcpu->cpu;
153                 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
154                 list_add_tail(&vcpu->blocked_vcpu_list,
155                               &per_cpu(blocked_vcpu_on_cpu,
156                                        vcpu->pre_pcpu));
157                 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
158         }
159
160         do {
161                 old.control = new.control = pi_desc->control;
162
163                 WARN((pi_desc->sn == 1),
164                      "Warning: SN field of posted-interrupts "
165                      "is set before blocking\n");
166
167                 /*
168                  * Since vCPU can be preempted during this process,
169                  * vcpu->cpu could be different with pre_pcpu, we
170                  * need to set pre_pcpu as the destination of wakeup
171                  * notification event, then we can find the right vCPU
172                  * to wakeup in wakeup handler if interrupts happen
173                  * when the vCPU is in blocked state.
174                  */
175                 dest = cpu_physical_id(vcpu->pre_pcpu);
176
177                 if (x2apic_mode)
178                         new.ndst = dest;
179                 else
180                         new.ndst = (dest << 8) & 0xFF00;
181
182                 /* set 'NV' to 'wakeup vector' */
183                 new.nv = POSTED_INTR_WAKEUP_VECTOR;
184         } while (cmpxchg64(&pi_desc->control, old.control,
185                            new.control) != old.control);
186
187         /* We should not block the vCPU if an interrupt is posted for it.  */
188         if (pi_test_on(pi_desc) == 1)
189                 __pi_post_block(vcpu);
190
191         local_irq_enable();
192         return (vcpu->pre_pcpu == -1);
193 }
194
195 void pi_post_block(struct kvm_vcpu *vcpu)
196 {
197         if (vcpu->pre_pcpu == -1)
198                 return;
199
200         WARN_ON(irqs_disabled());
201         local_irq_disable();
202         __pi_post_block(vcpu);
203         local_irq_enable();
204 }
205
206 /*
207  * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
208  */
209 void pi_wakeup_handler(void)
210 {
211         struct kvm_vcpu *vcpu;
212         int cpu = smp_processor_id();
213
214         spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
215         list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
216                         blocked_vcpu_list) {
217                 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
218
219                 if (pi_test_on(pi_desc) == 1)
220                         kvm_vcpu_kick(vcpu);
221         }
222         spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
223 }
224
225 void __init pi_init_cpu(int cpu)
226 {
227         INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
228         spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
229 }
230
231 bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)
232 {
233         struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
234
235         return pi_test_on(pi_desc) ||
236                 (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc));
237 }
238
239
240 /*
241  * Bail out of the block loop if the VM has an assigned
242  * device, but the blocking vCPU didn't reconfigure the
243  * PI.NV to the wakeup vector, i.e. the assigned device
244  * came along after the initial check in pi_pre_block().
245  */
246 void vmx_pi_start_assignment(struct kvm *kvm)
247 {
248         if (!irq_remapping_cap(IRQ_POSTING_CAP))
249                 return;
250
251         kvm_make_all_cpus_request(kvm, KVM_REQ_UNBLOCK);
252 }
253
254 /*
255  * pi_update_irte - set IRTE for Posted-Interrupts
256  *
257  * @kvm: kvm
258  * @host_irq: host irq of the interrupt
259  * @guest_irq: gsi of the interrupt
260  * @set: set or unset PI
261  * returns 0 on success, < 0 on failure
262  */
263 int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq,
264                    bool set)
265 {
266         struct kvm_kernel_irq_routing_entry *e;
267         struct kvm_irq_routing_table *irq_rt;
268         struct kvm_lapic_irq irq;
269         struct kvm_vcpu *vcpu;
270         struct vcpu_data vcpu_info;
271         int idx, ret = 0;
272
273         if (!kvm_arch_has_assigned_device(kvm) ||
274             !irq_remapping_cap(IRQ_POSTING_CAP) ||
275             !kvm_vcpu_apicv_active(kvm->vcpus[0]))
276                 return 0;
277
278         idx = srcu_read_lock(&kvm->irq_srcu);
279         irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
280         if (guest_irq >= irq_rt->nr_rt_entries ||
281             hlist_empty(&irq_rt->map[guest_irq])) {
282                 pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
283                              guest_irq, irq_rt->nr_rt_entries);
284                 goto out;
285         }
286
287         hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
288                 if (e->type != KVM_IRQ_ROUTING_MSI)
289                         continue;
290                 /*
291                  * VT-d PI cannot support posting multicast/broadcast
292                  * interrupts to a vCPU, we still use interrupt remapping
293                  * for these kind of interrupts.
294                  *
295                  * For lowest-priority interrupts, we only support
296                  * those with single CPU as the destination, e.g. user
297                  * configures the interrupts via /proc/irq or uses
298                  * irqbalance to make the interrupts single-CPU.
299                  *
300                  * We will support full lowest-priority interrupt later.
301                  *
302                  * In addition, we can only inject generic interrupts using
303                  * the PI mechanism, refuse to route others through it.
304                  */
305
306                 kvm_set_msi_irq(kvm, e, &irq);
307                 if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
308                     !kvm_irq_is_postable(&irq)) {
309                         /*
310                          * Make sure the IRTE is in remapped mode if
311                          * we don't handle it in posted mode.
312                          */
313                         ret = irq_set_vcpu_affinity(host_irq, NULL);
314                         if (ret < 0) {
315                                 printk(KERN_INFO
316                                    "failed to back to remapped mode, irq: %u\n",
317                                    host_irq);
318                                 goto out;
319                         }
320
321                         continue;
322                 }
323
324                 vcpu_info.pi_desc_addr = __pa(&to_vmx(vcpu)->pi_desc);
325                 vcpu_info.vector = irq.vector;
326
327                 trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
328                                 vcpu_info.vector, vcpu_info.pi_desc_addr, set);
329
330                 if (set)
331                         ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
332                 else
333                         ret = irq_set_vcpu_affinity(host_irq, NULL);
334
335                 if (ret < 0) {
336                         printk(KERN_INFO "%s: failed to update PI IRTE\n",
337                                         __func__);
338                         goto out;
339                 }
340         }
341
342         ret = 0;
343 out:
344         srcu_read_unlock(&kvm->irq_srcu, idx);
345         return ret;
346 }