9d4f73c4dc02aa0644ce64559a518ae650f12377
[linux-2.6-microblaze.git] / arch / x86 / kvm / lapic.c
1 // SPDX-License-Identifier: GPL-2.0-only
2
3 /*
4  * Local APIC virtualization
5  *
6  * Copyright (C) 2006 Qumranet, Inc.
7  * Copyright (C) 2007 Novell
8  * Copyright (C) 2007 Intel
9  * Copyright 2009 Red Hat, Inc. and/or its affiliates.
10  *
11  * Authors:
12  *   Dor Laor <dor.laor@qumranet.com>
13  *   Gregory Haskins <ghaskins@novell.com>
14  *   Yaozu (Eddie) Dong <eddie.dong@intel.com>
15  *
16  * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
17  */
18
19 #include <linux/kvm_host.h>
20 #include <linux/kvm.h>
21 #include <linux/mm.h>
22 #include <linux/highmem.h>
23 #include <linux/smp.h>
24 #include <linux/hrtimer.h>
25 #include <linux/io.h>
26 #include <linux/export.h>
27 #include <linux/math64.h>
28 #include <linux/slab.h>
29 #include <asm/processor.h>
30 #include <asm/mce.h>
31 #include <asm/msr.h>
32 #include <asm/page.h>
33 #include <asm/current.h>
34 #include <asm/apicdef.h>
35 #include <asm/delay.h>
36 #include <linux/atomic.h>
37 #include <linux/jump_label.h>
38 #include "kvm_cache_regs.h"
39 #include "irq.h"
40 #include "ioapic.h"
41 #include "trace.h"
42 #include "x86.h"
43 #include "cpuid.h"
44 #include "hyperv.h"
45
46 #ifndef CONFIG_X86_64
47 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
48 #else
49 #define mod_64(x, y) ((x) % (y))
50 #endif
51
52 #define PRId64 "d"
53 #define PRIx64 "llx"
54 #define PRIu64 "u"
55 #define PRIo64 "o"
56
57 /* 14 is the version for Xeon and Pentium 8.4.8*/
58 #define APIC_VERSION                    0x14UL
59 #define LAPIC_MMIO_LENGTH               (1 << 12)
60 /* followed define is not in apicdef.h */
61 #define MAX_APIC_VECTOR                 256
62 #define APIC_VECTORS_PER_REG            32
63
64 static bool lapic_timer_advance_dynamic __read_mostly;
65 #define LAPIC_TIMER_ADVANCE_ADJUST_MIN  100     /* clock cycles */
66 #define LAPIC_TIMER_ADVANCE_ADJUST_MAX  10000   /* clock cycles */
67 #define LAPIC_TIMER_ADVANCE_NS_INIT     1000
68 #define LAPIC_TIMER_ADVANCE_NS_MAX     5000
69 /* step-by-step approximation to mitigate fluctuation */
70 #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
71 static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data);
72
73 static inline void __kvm_lapic_set_reg(char *regs, int reg_off, u32 val)
74 {
75         *((u32 *) (regs + reg_off)) = val;
76 }
77
78 static inline void kvm_lapic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
79 {
80         __kvm_lapic_set_reg(apic->regs, reg_off, val);
81 }
82
83 static __always_inline u64 __kvm_lapic_get_reg64(char *regs, int reg)
84 {
85         BUILD_BUG_ON(reg != APIC_ICR);
86         return *((u64 *) (regs + reg));
87 }
88
89 static __always_inline u64 kvm_lapic_get_reg64(struct kvm_lapic *apic, int reg)
90 {
91         return __kvm_lapic_get_reg64(apic->regs, reg);
92 }
93
94 static __always_inline void __kvm_lapic_set_reg64(char *regs, int reg, u64 val)
95 {
96         BUILD_BUG_ON(reg != APIC_ICR);
97         *((u64 *) (regs + reg)) = val;
98 }
99
100 static __always_inline void kvm_lapic_set_reg64(struct kvm_lapic *apic,
101                                                 int reg, u64 val)
102 {
103         __kvm_lapic_set_reg64(apic->regs, reg, val);
104 }
105
106 static inline int apic_test_vector(int vec, void *bitmap)
107 {
108         return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
109 }
110
111 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
112 {
113         struct kvm_lapic *apic = vcpu->arch.apic;
114
115         return apic_test_vector(vector, apic->regs + APIC_ISR) ||
116                 apic_test_vector(vector, apic->regs + APIC_IRR);
117 }
118
119 static inline int __apic_test_and_set_vector(int vec, void *bitmap)
120 {
121         return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
122 }
123
124 static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
125 {
126         return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
127 }
128
129 __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ);
130 __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ);
131
132 static inline int apic_enabled(struct kvm_lapic *apic)
133 {
134         return kvm_apic_sw_enabled(apic) &&     kvm_apic_hw_enabled(apic);
135 }
136
137 #define LVT_MASK        \
138         (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
139
140 #define LINT_MASK       \
141         (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
142          APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
143
144 static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
145 {
146         return apic->vcpu->vcpu_id;
147 }
148
149 static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
150 {
151         return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) &&
152                 (kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm));
153 }
154
155 bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
156 {
157         return kvm_x86_ops.set_hv_timer
158                && !(kvm_mwait_in_guest(vcpu->kvm) ||
159                     kvm_can_post_timer_interrupt(vcpu));
160 }
161 EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer);
162
163 static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
164 {
165         return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
166 }
167
168 static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
169                 u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
170         switch (map->mode) {
171         case KVM_APIC_MODE_X2APIC: {
172                 u32 offset = (dest_id >> 16) * 16;
173                 u32 max_apic_id = map->max_apic_id;
174
175                 if (offset <= max_apic_id) {
176                         u8 cluster_size = min(max_apic_id - offset + 1, 16U);
177
178                         offset = array_index_nospec(offset, map->max_apic_id + 1);
179                         *cluster = &map->phys_map[offset];
180                         *mask = dest_id & (0xffff >> (16 - cluster_size));
181                 } else {
182                         *mask = 0;
183                 }
184
185                 return true;
186                 }
187         case KVM_APIC_MODE_XAPIC_FLAT:
188                 *cluster = map->xapic_flat_map;
189                 *mask = dest_id & 0xff;
190                 return true;
191         case KVM_APIC_MODE_XAPIC_CLUSTER:
192                 *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
193                 *mask = dest_id & 0xf;
194                 return true;
195         default:
196                 /* Not optimized. */
197                 return false;
198         }
199 }
200
201 static void kvm_apic_map_free(struct rcu_head *rcu)
202 {
203         struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
204
205         kvfree(map);
206 }
207
208 /*
209  * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock.
210  *
211  * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with
212  * apic_map_lock_held.
213  */
214 enum {
215         CLEAN,
216         UPDATE_IN_PROGRESS,
217         DIRTY
218 };
219
220 void kvm_recalculate_apic_map(struct kvm *kvm)
221 {
222         struct kvm_apic_map *new, *old = NULL;
223         struct kvm_vcpu *vcpu;
224         unsigned long i;
225         u32 max_id = 255; /* enough space for any xAPIC ID */
226
227         /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map.  */
228         if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
229                 return;
230
231         WARN_ONCE(!irqchip_in_kernel(kvm),
232                   "Dirty APIC map without an in-kernel local APIC");
233
234         mutex_lock(&kvm->arch.apic_map_lock);
235         /*
236          * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
237          * (if clean) or the APIC registers (if dirty).
238          */
239         if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
240                                    DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
241                 /* Someone else has updated the map. */
242                 mutex_unlock(&kvm->arch.apic_map_lock);
243                 return;
244         }
245
246         kvm_for_each_vcpu(i, vcpu, kvm)
247                 if (kvm_apic_present(vcpu))
248                         max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
249
250         new = kvzalloc(sizeof(struct kvm_apic_map) +
251                            sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
252                            GFP_KERNEL_ACCOUNT);
253
254         if (!new)
255                 goto out;
256
257         new->max_apic_id = max_id;
258
259         kvm_for_each_vcpu(i, vcpu, kvm) {
260                 struct kvm_lapic *apic = vcpu->arch.apic;
261                 struct kvm_lapic **cluster;
262                 u16 mask;
263                 u32 ldr;
264                 u8 xapic_id;
265                 u32 x2apic_id;
266
267                 if (!kvm_apic_present(vcpu))
268                         continue;
269
270                 xapic_id = kvm_xapic_id(apic);
271                 x2apic_id = kvm_x2apic_id(apic);
272
273                 /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
274                 if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
275                                 x2apic_id <= new->max_apic_id)
276                         new->phys_map[x2apic_id] = apic;
277                 /*
278                  * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
279                  * prevent them from masking VCPUs with APIC ID <= 0xff.
280                  */
281                 if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
282                         new->phys_map[xapic_id] = apic;
283
284                 if (!kvm_apic_sw_enabled(apic))
285                         continue;
286
287                 ldr = kvm_lapic_get_reg(apic, APIC_LDR);
288
289                 if (apic_x2apic_mode(apic)) {
290                         new->mode |= KVM_APIC_MODE_X2APIC;
291                 } else if (ldr) {
292                         ldr = GET_APIC_LOGICAL_ID(ldr);
293                         if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
294                                 new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
295                         else
296                                 new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
297                 }
298
299                 if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
300                         continue;
301
302                 if (mask)
303                         cluster[ffs(mask) - 1] = apic;
304         }
305 out:
306         old = rcu_dereference_protected(kvm->arch.apic_map,
307                         lockdep_is_held(&kvm->arch.apic_map_lock));
308         rcu_assign_pointer(kvm->arch.apic_map, new);
309         /*
310          * Write kvm->arch.apic_map before clearing apic->apic_map_dirty.
311          * If another update has come in, leave it DIRTY.
312          */
313         atomic_cmpxchg_release(&kvm->arch.apic_map_dirty,
314                                UPDATE_IN_PROGRESS, CLEAN);
315         mutex_unlock(&kvm->arch.apic_map_lock);
316
317         if (old)
318                 call_rcu(&old->rcu, kvm_apic_map_free);
319
320         kvm_make_scan_ioapic_request(kvm);
321 }
322
323 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
324 {
325         bool enabled = val & APIC_SPIV_APIC_ENABLED;
326
327         kvm_lapic_set_reg(apic, APIC_SPIV, val);
328
329         if (enabled != apic->sw_enabled) {
330                 apic->sw_enabled = enabled;
331                 if (enabled)
332                         static_branch_slow_dec_deferred(&apic_sw_disabled);
333                 else
334                         static_branch_inc(&apic_sw_disabled.key);
335
336                 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
337         }
338
339         /* Check if there are APF page ready requests pending */
340         if (enabled)
341                 kvm_make_request(KVM_REQ_APF_READY, apic->vcpu);
342 }
343
344 static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
345 {
346         kvm_lapic_set_reg(apic, APIC_ID, id << 24);
347         atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
348 }
349
350 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
351 {
352         kvm_lapic_set_reg(apic, APIC_LDR, id);
353         atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
354 }
355
356 static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val)
357 {
358         kvm_lapic_set_reg(apic, APIC_DFR, val);
359         atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
360 }
361
362 static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
363 {
364         return ((id >> 4) << 16) | (1 << (id & 0xf));
365 }
366
367 static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
368 {
369         u32 ldr = kvm_apic_calc_x2apic_ldr(id);
370
371         WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
372
373         kvm_lapic_set_reg(apic, APIC_ID, id);
374         kvm_lapic_set_reg(apic, APIC_LDR, ldr);
375         atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
376 }
377
378 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
379 {
380         return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
381 }
382
383 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
384 {
385         return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
386 }
387
388 static inline int apic_lvtt_period(struct kvm_lapic *apic)
389 {
390         return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
391 }
392
393 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
394 {
395         return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
396 }
397
398 static inline int apic_lvt_nmi_mode(u32 lvt_val)
399 {
400         return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
401 }
402
403 static inline bool kvm_lapic_lvt_supported(struct kvm_lapic *apic, int lvt_index)
404 {
405         return apic->nr_lvt_entries > lvt_index;
406 }
407
408 static inline int kvm_apic_calc_nr_lvt_entries(struct kvm_vcpu *vcpu)
409 {
410         return KVM_APIC_MAX_NR_LVT_ENTRIES - !(vcpu->arch.mcg_cap & MCG_CMCI_P);
411 }
412
413 void kvm_apic_set_version(struct kvm_vcpu *vcpu)
414 {
415         struct kvm_lapic *apic = vcpu->arch.apic;
416         u32 v = 0;
417
418         if (!lapic_in_kernel(vcpu))
419                 return;
420
421         v = APIC_VERSION | ((apic->nr_lvt_entries - 1) << 16);
422
423         /*
424          * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
425          * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
426          * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
427          * version first and level-triggered interrupts never get EOIed in
428          * IOAPIC.
429          */
430         if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) &&
431             !ioapic_in_kernel(vcpu->kvm))
432                 v |= APIC_LVR_DIRECTED_EOI;
433         kvm_lapic_set_reg(apic, APIC_LVR, v);
434 }
435
436 void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu)
437 {
438         int nr_lvt_entries = kvm_apic_calc_nr_lvt_entries(vcpu);
439         struct kvm_lapic *apic = vcpu->arch.apic;
440         int i;
441
442         if (!lapic_in_kernel(vcpu) || nr_lvt_entries == apic->nr_lvt_entries)
443                 return;
444
445         /* Initialize/mask any "new" LVT entries. */
446         for (i = apic->nr_lvt_entries; i < nr_lvt_entries; i++)
447                 kvm_lapic_set_reg(apic, APIC_LVTx(i), APIC_LVT_MASKED);
448
449         apic->nr_lvt_entries = nr_lvt_entries;
450
451         /* The number of LVT entries is reflected in the version register. */
452         kvm_apic_set_version(vcpu);
453 }
454
455 static const unsigned int apic_lvt_mask[KVM_APIC_MAX_NR_LVT_ENTRIES] = {
456         [LVT_TIMER] = LVT_MASK,      /* timer mode mask added at runtime */
457         [LVT_THERMAL_MONITOR] = LVT_MASK | APIC_MODE_MASK,
458         [LVT_PERFORMANCE_COUNTER] = LVT_MASK | APIC_MODE_MASK,
459         [LVT_LINT0] = LINT_MASK,
460         [LVT_LINT1] = LINT_MASK,
461         [LVT_ERROR] = LVT_MASK,
462         [LVT_CMCI] = LVT_MASK | APIC_MODE_MASK
463 };
464
465 static int find_highest_vector(void *bitmap)
466 {
467         int vec;
468         u32 *reg;
469
470         for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
471              vec >= 0; vec -= APIC_VECTORS_PER_REG) {
472                 reg = bitmap + REG_POS(vec);
473                 if (*reg)
474                         return __fls(*reg) + vec;
475         }
476
477         return -1;
478 }
479
480 static u8 count_vectors(void *bitmap)
481 {
482         int vec;
483         u32 *reg;
484         u8 count = 0;
485
486         for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
487                 reg = bitmap + REG_POS(vec);
488                 count += hweight32(*reg);
489         }
490
491         return count;
492 }
493
494 bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
495 {
496         u32 i, vec;
497         u32 pir_val, irr_val, prev_irr_val;
498         int max_updated_irr;
499
500         max_updated_irr = -1;
501         *max_irr = -1;
502
503         for (i = vec = 0; i <= 7; i++, vec += 32) {
504                 pir_val = READ_ONCE(pir[i]);
505                 irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
506                 if (pir_val) {
507                         prev_irr_val = irr_val;
508                         irr_val |= xchg(&pir[i], 0);
509                         *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
510                         if (prev_irr_val != irr_val) {
511                                 max_updated_irr =
512                                         __fls(irr_val ^ prev_irr_val) + vec;
513                         }
514                 }
515                 if (irr_val)
516                         *max_irr = __fls(irr_val) + vec;
517         }
518
519         return ((max_updated_irr != -1) &&
520                 (max_updated_irr == *max_irr));
521 }
522 EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
523
524 bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
525 {
526         struct kvm_lapic *apic = vcpu->arch.apic;
527
528         return __kvm_apic_update_irr(pir, apic->regs, max_irr);
529 }
530 EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
531
532 static inline int apic_search_irr(struct kvm_lapic *apic)
533 {
534         return find_highest_vector(apic->regs + APIC_IRR);
535 }
536
537 static inline int apic_find_highest_irr(struct kvm_lapic *apic)
538 {
539         int result;
540
541         /*
542          * Note that irr_pending is just a hint. It will be always
543          * true with virtual interrupt delivery enabled.
544          */
545         if (!apic->irr_pending)
546                 return -1;
547
548         result = apic_search_irr(apic);
549         ASSERT(result == -1 || result >= 16);
550
551         return result;
552 }
553
554 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
555 {
556         if (unlikely(apic->apicv_active)) {
557                 /* need to update RVI */
558                 kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
559                 static_call_cond(kvm_x86_hwapic_irr_update)(apic->vcpu,
560                                                             apic_find_highest_irr(apic));
561         } else {
562                 apic->irr_pending = false;
563                 kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
564                 if (apic_search_irr(apic) != -1)
565                         apic->irr_pending = true;
566         }
567 }
568
569 void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec)
570 {
571         apic_clear_irr(vec, vcpu->arch.apic);
572 }
573 EXPORT_SYMBOL_GPL(kvm_apic_clear_irr);
574
575 static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
576 {
577         if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
578                 return;
579
580         /*
581          * With APIC virtualization enabled, all caching is disabled
582          * because the processor can modify ISR under the hood.  Instead
583          * just set SVI.
584          */
585         if (unlikely(apic->apicv_active))
586                 static_call_cond(kvm_x86_hwapic_isr_update)(vec);
587         else {
588                 ++apic->isr_count;
589                 BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
590                 /*
591                  * ISR (in service register) bit is set when injecting an interrupt.
592                  * The highest vector is injected. Thus the latest bit set matches
593                  * the highest bit in ISR.
594                  */
595                 apic->highest_isr_cache = vec;
596         }
597 }
598
599 static inline int apic_find_highest_isr(struct kvm_lapic *apic)
600 {
601         int result;
602
603         /*
604          * Note that isr_count is always 1, and highest_isr_cache
605          * is always -1, with APIC virtualization enabled.
606          */
607         if (!apic->isr_count)
608                 return -1;
609         if (likely(apic->highest_isr_cache != -1))
610                 return apic->highest_isr_cache;
611
612         result = find_highest_vector(apic->regs + APIC_ISR);
613         ASSERT(result == -1 || result >= 16);
614
615         return result;
616 }
617
618 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
619 {
620         if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
621                 return;
622
623         /*
624          * We do get here for APIC virtualization enabled if the guest
625          * uses the Hyper-V APIC enlightenment.  In this case we may need
626          * to trigger a new interrupt delivery by writing the SVI field;
627          * on the other hand isr_count and highest_isr_cache are unused
628          * and must be left alone.
629          */
630         if (unlikely(apic->apicv_active))
631                 static_call_cond(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic));
632         else {
633                 --apic->isr_count;
634                 BUG_ON(apic->isr_count < 0);
635                 apic->highest_isr_cache = -1;
636         }
637 }
638
639 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
640 {
641         /* This may race with setting of irr in __apic_accept_irq() and
642          * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
643          * will cause vmexit immediately and the value will be recalculated
644          * on the next vmentry.
645          */
646         return apic_find_highest_irr(vcpu->arch.apic);
647 }
648 EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
649
650 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
651                              int vector, int level, int trig_mode,
652                              struct dest_map *dest_map);
653
654 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
655                      struct dest_map *dest_map)
656 {
657         struct kvm_lapic *apic = vcpu->arch.apic;
658
659         return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
660                         irq->level, irq->trig_mode, dest_map);
661 }
662
663 static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
664                          struct kvm_lapic_irq *irq, u32 min)
665 {
666         int i, count = 0;
667         struct kvm_vcpu *vcpu;
668
669         if (min > map->max_apic_id)
670                 return 0;
671
672         for_each_set_bit(i, ipi_bitmap,
673                 min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
674                 if (map->phys_map[min + i]) {
675                         vcpu = map->phys_map[min + i]->vcpu;
676                         count += kvm_apic_set_irq(vcpu, irq, NULL);
677                 }
678         }
679
680         return count;
681 }
682
683 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
684                     unsigned long ipi_bitmap_high, u32 min,
685                     unsigned long icr, int op_64_bit)
686 {
687         struct kvm_apic_map *map;
688         struct kvm_lapic_irq irq = {0};
689         int cluster_size = op_64_bit ? 64 : 32;
690         int count;
691
692         if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
693                 return -KVM_EINVAL;
694
695         irq.vector = icr & APIC_VECTOR_MASK;
696         irq.delivery_mode = icr & APIC_MODE_MASK;
697         irq.level = (icr & APIC_INT_ASSERT) != 0;
698         irq.trig_mode = icr & APIC_INT_LEVELTRIG;
699
700         rcu_read_lock();
701         map = rcu_dereference(kvm->arch.apic_map);
702
703         count = -EOPNOTSUPP;
704         if (likely(map)) {
705                 count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min);
706                 min += cluster_size;
707                 count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min);
708         }
709
710         rcu_read_unlock();
711         return count;
712 }
713
714 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
715 {
716
717         return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
718                                       sizeof(val));
719 }
720
721 static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
722 {
723
724         return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
725                                       sizeof(*val));
726 }
727
728 static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
729 {
730         return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
731 }
732
733 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
734 {
735         if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0)
736                 return;
737
738         __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
739 }
740
741 static bool pv_eoi_test_and_clr_pending(struct kvm_vcpu *vcpu)
742 {
743         u8 val;
744
745         if (pv_eoi_get_user(vcpu, &val) < 0)
746                 return false;
747
748         val &= KVM_PV_EOI_ENABLED;
749
750         if (val && pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0)
751                 return false;
752
753         /*
754          * Clear pending bit in any case: it will be set again on vmentry.
755          * While this might not be ideal from performance point of view,
756          * this makes sure pv eoi is only enabled when we know it's safe.
757          */
758         __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
759
760         return val;
761 }
762
763 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
764 {
765         int highest_irr;
766         if (kvm_x86_ops.sync_pir_to_irr)
767                 highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu);
768         else
769                 highest_irr = apic_find_highest_irr(apic);
770         if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
771                 return -1;
772         return highest_irr;
773 }
774
775 static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
776 {
777         u32 tpr, isrv, ppr, old_ppr;
778         int isr;
779
780         old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
781         tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
782         isr = apic_find_highest_isr(apic);
783         isrv = (isr != -1) ? isr : 0;
784
785         if ((tpr & 0xf0) >= (isrv & 0xf0))
786                 ppr = tpr & 0xff;
787         else
788                 ppr = isrv & 0xf0;
789
790         *new_ppr = ppr;
791         if (old_ppr != ppr)
792                 kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
793
794         return ppr < old_ppr;
795 }
796
797 static void apic_update_ppr(struct kvm_lapic *apic)
798 {
799         u32 ppr;
800
801         if (__apic_update_ppr(apic, &ppr) &&
802             apic_has_interrupt_for_ppr(apic, ppr) != -1)
803                 kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
804 }
805
806 void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
807 {
808         apic_update_ppr(vcpu->arch.apic);
809 }
810 EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
811
812 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
813 {
814         kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
815         apic_update_ppr(apic);
816 }
817
818 static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
819 {
820         return mda == (apic_x2apic_mode(apic) ?
821                         X2APIC_BROADCAST : APIC_BROADCAST);
822 }
823
824 static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
825 {
826         if (kvm_apic_broadcast(apic, mda))
827                 return true;
828
829         /*
830          * Hotplug hack: Accept interrupts for vCPUs in xAPIC mode as if they
831          * were in x2APIC mode if the target APIC ID can't be encoded as an
832          * xAPIC ID.  This allows unique addressing of hotplugged vCPUs (which
833          * start in xAPIC mode) with an APIC ID that is unaddressable in xAPIC
834          * mode.  Match the x2APIC ID if and only if the target APIC ID can't
835          * be encoded in xAPIC to avoid spurious matches against a vCPU that
836          * changed its (addressable) xAPIC ID (which is writable).
837          */
838         if (apic_x2apic_mode(apic) || mda > 0xff)
839                 return mda == kvm_x2apic_id(apic);
840
841         return mda == kvm_xapic_id(apic);
842 }
843
844 static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
845 {
846         u32 logical_id;
847
848         if (kvm_apic_broadcast(apic, mda))
849                 return true;
850
851         logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
852
853         if (apic_x2apic_mode(apic))
854                 return ((logical_id >> 16) == (mda >> 16))
855                        && (logical_id & mda & 0xffff) != 0;
856
857         logical_id = GET_APIC_LOGICAL_ID(logical_id);
858
859         switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
860         case APIC_DFR_FLAT:
861                 return (logical_id & mda) != 0;
862         case APIC_DFR_CLUSTER:
863                 return ((logical_id >> 4) == (mda >> 4))
864                        && (logical_id & mda & 0xf) != 0;
865         default:
866                 return false;
867         }
868 }
869
870 /* The KVM local APIC implementation has two quirks:
871  *
872  *  - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
873  *    in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
874  *    KVM doesn't do that aliasing.
875  *
876  *  - in-kernel IOAPIC messages have to be delivered directly to
877  *    x2APIC, because the kernel does not support interrupt remapping.
878  *    In order to support broadcast without interrupt remapping, x2APIC
879  *    rewrites the destination of non-IPI messages from APIC_BROADCAST
880  *    to X2APIC_BROADCAST.
881  *
882  * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API.  This is
883  * important when userspace wants to use x2APIC-format MSIs, because
884  * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
885  */
886 static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
887                 struct kvm_lapic *source, struct kvm_lapic *target)
888 {
889         bool ipi = source != NULL;
890
891         if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
892             !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
893                 return X2APIC_BROADCAST;
894
895         return dest_id;
896 }
897
898 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
899                            int shorthand, unsigned int dest, int dest_mode)
900 {
901         struct kvm_lapic *target = vcpu->arch.apic;
902         u32 mda = kvm_apic_mda(vcpu, dest, source, target);
903
904         ASSERT(target);
905         switch (shorthand) {
906         case APIC_DEST_NOSHORT:
907                 if (dest_mode == APIC_DEST_PHYSICAL)
908                         return kvm_apic_match_physical_addr(target, mda);
909                 else
910                         return kvm_apic_match_logical_addr(target, mda);
911         case APIC_DEST_SELF:
912                 return target == source;
913         case APIC_DEST_ALLINC:
914                 return true;
915         case APIC_DEST_ALLBUT:
916                 return target != source;
917         default:
918                 return false;
919         }
920 }
921 EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
922
923 int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
924                        const unsigned long *bitmap, u32 bitmap_size)
925 {
926         u32 mod;
927         int i, idx = -1;
928
929         mod = vector % dest_vcpus;
930
931         for (i = 0; i <= mod; i++) {
932                 idx = find_next_bit(bitmap, bitmap_size, idx + 1);
933                 BUG_ON(idx == bitmap_size);
934         }
935
936         return idx;
937 }
938
939 static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
940 {
941         if (!kvm->arch.disabled_lapic_found) {
942                 kvm->arch.disabled_lapic_found = true;
943                 printk(KERN_INFO
944                        "Disabled LAPIC found during irq injection\n");
945         }
946 }
947
948 static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
949                 struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
950 {
951         if (kvm->arch.x2apic_broadcast_quirk_disabled) {
952                 if ((irq->dest_id == APIC_BROADCAST &&
953                                 map->mode != KVM_APIC_MODE_X2APIC))
954                         return true;
955                 if (irq->dest_id == X2APIC_BROADCAST)
956                         return true;
957         } else {
958                 bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
959                 if (irq->dest_id == (x2apic_ipi ?
960                                      X2APIC_BROADCAST : APIC_BROADCAST))
961                         return true;
962         }
963
964         return false;
965 }
966
967 /* Return true if the interrupt can be handled by using *bitmap as index mask
968  * for valid destinations in *dst array.
969  * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
970  * Note: we may have zero kvm_lapic destinations when we return true, which
971  * means that the interrupt should be dropped.  In this case, *bitmap would be
972  * zero and *dst undefined.
973  */
974 static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
975                 struct kvm_lapic **src, struct kvm_lapic_irq *irq,
976                 struct kvm_apic_map *map, struct kvm_lapic ***dst,
977                 unsigned long *bitmap)
978 {
979         int i, lowest;
980
981         if (irq->shorthand == APIC_DEST_SELF && src) {
982                 *dst = src;
983                 *bitmap = 1;
984                 return true;
985         } else if (irq->shorthand)
986                 return false;
987
988         if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
989                 return false;
990
991         if (irq->dest_mode == APIC_DEST_PHYSICAL) {
992                 if (irq->dest_id > map->max_apic_id) {
993                         *bitmap = 0;
994                 } else {
995                         u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
996                         *dst = &map->phys_map[dest_id];
997                         *bitmap = 1;
998                 }
999                 return true;
1000         }
1001
1002         *bitmap = 0;
1003         if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
1004                                 (u16 *)bitmap))
1005                 return false;
1006
1007         if (!kvm_lowest_prio_delivery(irq))
1008                 return true;
1009
1010         if (!kvm_vector_hashing_enabled()) {
1011                 lowest = -1;
1012                 for_each_set_bit(i, bitmap, 16) {
1013                         if (!(*dst)[i])
1014                                 continue;
1015                         if (lowest < 0)
1016                                 lowest = i;
1017                         else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
1018                                                 (*dst)[lowest]->vcpu) < 0)
1019                                 lowest = i;
1020                 }
1021         } else {
1022                 if (!*bitmap)
1023                         return true;
1024
1025                 lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
1026                                 bitmap, 16);
1027
1028                 if (!(*dst)[lowest]) {
1029                         kvm_apic_disabled_lapic_found(kvm);
1030                         *bitmap = 0;
1031                         return true;
1032                 }
1033         }
1034
1035         *bitmap = (lowest >= 0) ? 1 << lowest : 0;
1036
1037         return true;
1038 }
1039
1040 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
1041                 struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
1042 {
1043         struct kvm_apic_map *map;
1044         unsigned long bitmap;
1045         struct kvm_lapic **dst = NULL;
1046         int i;
1047         bool ret;
1048
1049         *r = -1;
1050
1051         if (irq->shorthand == APIC_DEST_SELF) {
1052                 if (KVM_BUG_ON(!src, kvm)) {
1053                         *r = 0;
1054                         return true;
1055                 }
1056                 *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
1057                 return true;
1058         }
1059
1060         rcu_read_lock();
1061         map = rcu_dereference(kvm->arch.apic_map);
1062
1063         ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
1064         if (ret) {
1065                 *r = 0;
1066                 for_each_set_bit(i, &bitmap, 16) {
1067                         if (!dst[i])
1068                                 continue;
1069                         *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
1070                 }
1071         }
1072
1073         rcu_read_unlock();
1074         return ret;
1075 }
1076
1077 /*
1078  * This routine tries to handle interrupts in posted mode, here is how
1079  * it deals with different cases:
1080  * - For single-destination interrupts, handle it in posted mode
1081  * - Else if vector hashing is enabled and it is a lowest-priority
1082  *   interrupt, handle it in posted mode and use the following mechanism
1083  *   to find the destination vCPU.
1084  *      1. For lowest-priority interrupts, store all the possible
1085  *         destination vCPUs in an array.
1086  *      2. Use "guest vector % max number of destination vCPUs" to find
1087  *         the right destination vCPU in the array for the lowest-priority
1088  *         interrupt.
1089  * - Otherwise, use remapped mode to inject the interrupt.
1090  */
1091 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
1092                         struct kvm_vcpu **dest_vcpu)
1093 {
1094         struct kvm_apic_map *map;
1095         unsigned long bitmap;
1096         struct kvm_lapic **dst = NULL;
1097         bool ret = false;
1098
1099         if (irq->shorthand)
1100                 return false;
1101
1102         rcu_read_lock();
1103         map = rcu_dereference(kvm->arch.apic_map);
1104
1105         if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
1106                         hweight16(bitmap) == 1) {
1107                 unsigned long i = find_first_bit(&bitmap, 16);
1108
1109                 if (dst[i]) {
1110                         *dest_vcpu = dst[i]->vcpu;
1111                         ret = true;
1112                 }
1113         }
1114
1115         rcu_read_unlock();
1116         return ret;
1117 }
1118
1119 /*
1120  * Add a pending IRQ into lapic.
1121  * Return 1 if successfully added and 0 if discarded.
1122  */
1123 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
1124                              int vector, int level, int trig_mode,
1125                              struct dest_map *dest_map)
1126 {
1127         int result = 0;
1128         struct kvm_vcpu *vcpu = apic->vcpu;
1129
1130         trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
1131                                   trig_mode, vector);
1132         switch (delivery_mode) {
1133         case APIC_DM_LOWEST:
1134                 vcpu->arch.apic_arb_prio++;
1135                 fallthrough;
1136         case APIC_DM_FIXED:
1137                 if (unlikely(trig_mode && !level))
1138                         break;
1139
1140                 /* FIXME add logic for vcpu on reset */
1141                 if (unlikely(!apic_enabled(apic)))
1142                         break;
1143
1144                 result = 1;
1145
1146                 if (dest_map) {
1147                         __set_bit(vcpu->vcpu_id, dest_map->map);
1148                         dest_map->vectors[vcpu->vcpu_id] = vector;
1149                 }
1150
1151                 if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
1152                         if (trig_mode)
1153                                 kvm_lapic_set_vector(vector,
1154                                                      apic->regs + APIC_TMR);
1155                         else
1156                                 kvm_lapic_clear_vector(vector,
1157                                                        apic->regs + APIC_TMR);
1158                 }
1159
1160                 static_call(kvm_x86_deliver_interrupt)(apic, delivery_mode,
1161                                                        trig_mode, vector);
1162                 break;
1163
1164         case APIC_DM_REMRD:
1165                 result = 1;
1166                 vcpu->arch.pv.pv_unhalted = 1;
1167                 kvm_make_request(KVM_REQ_EVENT, vcpu);
1168                 kvm_vcpu_kick(vcpu);
1169                 break;
1170
1171         case APIC_DM_SMI:
1172                 result = 1;
1173                 kvm_make_request(KVM_REQ_SMI, vcpu);
1174                 kvm_vcpu_kick(vcpu);
1175                 break;
1176
1177         case APIC_DM_NMI:
1178                 result = 1;
1179                 kvm_inject_nmi(vcpu);
1180                 kvm_vcpu_kick(vcpu);
1181                 break;
1182
1183         case APIC_DM_INIT:
1184                 if (!trig_mode || level) {
1185                         result = 1;
1186                         /* assumes that there are only KVM_APIC_INIT/SIPI */
1187                         apic->pending_events = (1UL << KVM_APIC_INIT);
1188                         kvm_make_request(KVM_REQ_EVENT, vcpu);
1189                         kvm_vcpu_kick(vcpu);
1190                 }
1191                 break;
1192
1193         case APIC_DM_STARTUP:
1194                 result = 1;
1195                 apic->sipi_vector = vector;
1196                 /* make sure sipi_vector is visible for the receiver */
1197                 smp_wmb();
1198                 set_bit(KVM_APIC_SIPI, &apic->pending_events);
1199                 kvm_make_request(KVM_REQ_EVENT, vcpu);
1200                 kvm_vcpu_kick(vcpu);
1201                 break;
1202
1203         case APIC_DM_EXTINT:
1204                 /*
1205                  * Should only be called by kvm_apic_local_deliver() with LVT0,
1206                  * before NMI watchdog was enabled. Already handled by
1207                  * kvm_apic_accept_pic_intr().
1208                  */
1209                 break;
1210
1211         default:
1212                 printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
1213                        delivery_mode);
1214                 break;
1215         }
1216         return result;
1217 }
1218
1219 /*
1220  * This routine identifies the destination vcpus mask meant to receive the
1221  * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find
1222  * out the destination vcpus array and set the bitmap or it traverses to
1223  * each available vcpu to identify the same.
1224  */
1225 void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
1226                               unsigned long *vcpu_bitmap)
1227 {
1228         struct kvm_lapic **dest_vcpu = NULL;
1229         struct kvm_lapic *src = NULL;
1230         struct kvm_apic_map *map;
1231         struct kvm_vcpu *vcpu;
1232         unsigned long bitmap, i;
1233         int vcpu_idx;
1234         bool ret;
1235
1236         rcu_read_lock();
1237         map = rcu_dereference(kvm->arch.apic_map);
1238
1239         ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu,
1240                                           &bitmap);
1241         if (ret) {
1242                 for_each_set_bit(i, &bitmap, 16) {
1243                         if (!dest_vcpu[i])
1244                                 continue;
1245                         vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx;
1246                         __set_bit(vcpu_idx, vcpu_bitmap);
1247                 }
1248         } else {
1249                 kvm_for_each_vcpu(i, vcpu, kvm) {
1250                         if (!kvm_apic_present(vcpu))
1251                                 continue;
1252                         if (!kvm_apic_match_dest(vcpu, NULL,
1253                                                  irq->shorthand,
1254                                                  irq->dest_id,
1255                                                  irq->dest_mode))
1256                                 continue;
1257                         __set_bit(i, vcpu_bitmap);
1258                 }
1259         }
1260         rcu_read_unlock();
1261 }
1262
1263 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1264 {
1265         return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
1266 }
1267
1268 static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
1269 {
1270         return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
1271 }
1272
1273 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
1274 {
1275         int trigger_mode;
1276
1277         /* Eoi the ioapic only if the ioapic doesn't own the vector. */
1278         if (!kvm_ioapic_handles_vector(apic, vector))
1279                 return;
1280
1281         /* Request a KVM exit to inform the userspace IOAPIC. */
1282         if (irqchip_split(apic->vcpu->kvm)) {
1283                 apic->vcpu->arch.pending_ioapic_eoi = vector;
1284                 kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
1285                 return;
1286         }
1287
1288         if (apic_test_vector(vector, apic->regs + APIC_TMR))
1289                 trigger_mode = IOAPIC_LEVEL_TRIG;
1290         else
1291                 trigger_mode = IOAPIC_EDGE_TRIG;
1292
1293         kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
1294 }
1295
1296 static int apic_set_eoi(struct kvm_lapic *apic)
1297 {
1298         int vector = apic_find_highest_isr(apic);
1299
1300         trace_kvm_eoi(apic, vector);
1301
1302         /*
1303          * Not every write EOI will has corresponding ISR,
1304          * one example is when Kernel check timer on setup_IO_APIC
1305          */
1306         if (vector == -1)
1307                 return vector;
1308
1309         apic_clear_isr(vector, apic);
1310         apic_update_ppr(apic);
1311
1312         if (to_hv_vcpu(apic->vcpu) &&
1313             test_bit(vector, to_hv_synic(apic->vcpu)->vec_bitmap))
1314                 kvm_hv_synic_send_eoi(apic->vcpu, vector);
1315
1316         kvm_ioapic_send_eoi(apic, vector);
1317         kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1318         return vector;
1319 }
1320
1321 /*
1322  * this interface assumes a trap-like exit, which has already finished
1323  * desired side effect including vISR and vPPR update.
1324  */
1325 void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
1326 {
1327         struct kvm_lapic *apic = vcpu->arch.apic;
1328
1329         trace_kvm_eoi(apic, vector);
1330
1331         kvm_ioapic_send_eoi(apic, vector);
1332         kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1333 }
1334 EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
1335
1336 void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
1337 {
1338         struct kvm_lapic_irq irq;
1339
1340         /* KVM has no delay and should always clear the BUSY/PENDING flag. */
1341         WARN_ON_ONCE(icr_low & APIC_ICR_BUSY);
1342
1343         irq.vector = icr_low & APIC_VECTOR_MASK;
1344         irq.delivery_mode = icr_low & APIC_MODE_MASK;
1345         irq.dest_mode = icr_low & APIC_DEST_MASK;
1346         irq.level = (icr_low & APIC_INT_ASSERT) != 0;
1347         irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
1348         irq.shorthand = icr_low & APIC_SHORT_MASK;
1349         irq.msi_redir_hint = false;
1350         if (apic_x2apic_mode(apic))
1351                 irq.dest_id = icr_high;
1352         else
1353                 irq.dest_id = GET_XAPIC_DEST_FIELD(icr_high);
1354
1355         trace_kvm_apic_ipi(icr_low, irq.dest_id);
1356
1357         kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
1358 }
1359 EXPORT_SYMBOL_GPL(kvm_apic_send_ipi);
1360
1361 static u32 apic_get_tmcct(struct kvm_lapic *apic)
1362 {
1363         ktime_t remaining, now;
1364         s64 ns;
1365         u32 tmcct;
1366
1367         ASSERT(apic != NULL);
1368
1369         /* if initial count is 0, current count should also be 0 */
1370         if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
1371                 apic->lapic_timer.period == 0)
1372                 return 0;
1373
1374         now = ktime_get();
1375         remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1376         if (ktime_to_ns(remaining) < 0)
1377                 remaining = 0;
1378
1379         ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
1380         tmcct = div64_u64(ns,
1381                          (APIC_BUS_CYCLE_NS * apic->divide_count));
1382
1383         return tmcct;
1384 }
1385
1386 static void __report_tpr_access(struct kvm_lapic *apic, bool write)
1387 {
1388         struct kvm_vcpu *vcpu = apic->vcpu;
1389         struct kvm_run *run = vcpu->run;
1390
1391         kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
1392         run->tpr_access.rip = kvm_rip_read(vcpu);
1393         run->tpr_access.is_write = write;
1394 }
1395
1396 static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
1397 {
1398         if (apic->vcpu->arch.tpr_access_reporting)
1399                 __report_tpr_access(apic, write);
1400 }
1401
1402 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
1403 {
1404         u32 val = 0;
1405
1406         if (offset >= LAPIC_MMIO_LENGTH)
1407                 return 0;
1408
1409         switch (offset) {
1410         case APIC_ARBPRI:
1411                 break;
1412
1413         case APIC_TMCCT:        /* Timer CCR */
1414                 if (apic_lvtt_tscdeadline(apic))
1415                         return 0;
1416
1417                 val = apic_get_tmcct(apic);
1418                 break;
1419         case APIC_PROCPRI:
1420                 apic_update_ppr(apic);
1421                 val = kvm_lapic_get_reg(apic, offset);
1422                 break;
1423         case APIC_TASKPRI:
1424                 report_tpr_access(apic, false);
1425                 fallthrough;
1426         default:
1427                 val = kvm_lapic_get_reg(apic, offset);
1428                 break;
1429         }
1430
1431         return val;
1432 }
1433
1434 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
1435 {
1436         return container_of(dev, struct kvm_lapic, dev);
1437 }
1438
1439 #define APIC_REG_MASK(reg)      (1ull << ((reg) >> 4))
1440 #define APIC_REGS_MASK(first, count) \
1441         (APIC_REG_MASK(first) * ((1ull << (count)) - 1))
1442
1443 static int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
1444                               void *data)
1445 {
1446         unsigned char alignment = offset & 0xf;
1447         u32 result;
1448         /* this bitmask has a bit cleared for each reserved register */
1449         u64 valid_reg_mask =
1450                 APIC_REG_MASK(APIC_ID) |
1451                 APIC_REG_MASK(APIC_LVR) |
1452                 APIC_REG_MASK(APIC_TASKPRI) |
1453                 APIC_REG_MASK(APIC_PROCPRI) |
1454                 APIC_REG_MASK(APIC_LDR) |
1455                 APIC_REG_MASK(APIC_DFR) |
1456                 APIC_REG_MASK(APIC_SPIV) |
1457                 APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
1458                 APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
1459                 APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
1460                 APIC_REG_MASK(APIC_ESR) |
1461                 APIC_REG_MASK(APIC_ICR) |
1462                 APIC_REG_MASK(APIC_LVTT) |
1463                 APIC_REG_MASK(APIC_LVTTHMR) |
1464                 APIC_REG_MASK(APIC_LVTPC) |
1465                 APIC_REG_MASK(APIC_LVT0) |
1466                 APIC_REG_MASK(APIC_LVT1) |
1467                 APIC_REG_MASK(APIC_LVTERR) |
1468                 APIC_REG_MASK(APIC_TMICT) |
1469                 APIC_REG_MASK(APIC_TMCCT) |
1470                 APIC_REG_MASK(APIC_TDCR);
1471
1472         if (kvm_lapic_lvt_supported(apic, LVT_CMCI))
1473                 valid_reg_mask |= APIC_REG_MASK(APIC_LVTCMCI);
1474
1475         /*
1476          * ARBPRI and ICR2 are not valid in x2APIC mode.  WARN if KVM reads ICR
1477          * in x2APIC mode as it's an 8-byte register in x2APIC and needs to be
1478          * manually handled by the caller.
1479          */
1480         if (!apic_x2apic_mode(apic))
1481                 valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI) |
1482                                   APIC_REG_MASK(APIC_ICR2);
1483         else
1484                 WARN_ON_ONCE(offset == APIC_ICR);
1485
1486         if (alignment + len > 4)
1487                 return 1;
1488
1489         if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
1490                 return 1;
1491
1492         result = __apic_read(apic, offset & ~0xf);
1493
1494         trace_kvm_apic_read(offset, result);
1495
1496         switch (len) {
1497         case 1:
1498         case 2:
1499         case 4:
1500                 memcpy(data, (char *)&result + alignment, len);
1501                 break;
1502         default:
1503                 printk(KERN_ERR "Local APIC read with len = %x, "
1504                        "should be 1,2, or 4 instead\n", len);
1505                 break;
1506         }
1507         return 0;
1508 }
1509
1510 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
1511 {
1512         return addr >= apic->base_address &&
1513                 addr < apic->base_address + LAPIC_MMIO_LENGTH;
1514 }
1515
1516 static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1517                            gpa_t address, int len, void *data)
1518 {
1519         struct kvm_lapic *apic = to_lapic(this);
1520         u32 offset = address - apic->base_address;
1521
1522         if (!apic_mmio_in_range(apic, address))
1523                 return -EOPNOTSUPP;
1524
1525         if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
1526                 if (!kvm_check_has_quirk(vcpu->kvm,
1527                                          KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
1528                         return -EOPNOTSUPP;
1529
1530                 memset(data, 0xff, len);
1531                 return 0;
1532         }
1533
1534         kvm_lapic_reg_read(apic, offset, len, data);
1535
1536         return 0;
1537 }
1538
1539 static void update_divide_count(struct kvm_lapic *apic)
1540 {
1541         u32 tmp1, tmp2, tdcr;
1542
1543         tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
1544         tmp1 = tdcr & 0xf;
1545         tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
1546         apic->divide_count = 0x1 << (tmp2 & 0x7);
1547 }
1548
1549 static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
1550 {
1551         /*
1552          * Do not allow the guest to program periodic timers with small
1553          * interval, since the hrtimers are not throttled by the host
1554          * scheduler.
1555          */
1556         if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1557                 s64 min_period = min_timer_period_us * 1000LL;
1558
1559                 if (apic->lapic_timer.period < min_period) {
1560                         pr_info_ratelimited(
1561                             "kvm: vcpu %i: requested %lld ns "
1562                             "lapic timer period limited to %lld ns\n",
1563                             apic->vcpu->vcpu_id,
1564                             apic->lapic_timer.period, min_period);
1565                         apic->lapic_timer.period = min_period;
1566                 }
1567         }
1568 }
1569
1570 static void cancel_hv_timer(struct kvm_lapic *apic);
1571
1572 static void cancel_apic_timer(struct kvm_lapic *apic)
1573 {
1574         hrtimer_cancel(&apic->lapic_timer.timer);
1575         preempt_disable();
1576         if (apic->lapic_timer.hv_timer_in_use)
1577                 cancel_hv_timer(apic);
1578         preempt_enable();
1579         atomic_set(&apic->lapic_timer.pending, 0);
1580 }
1581
1582 static void apic_update_lvtt(struct kvm_lapic *apic)
1583 {
1584         u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
1585                         apic->lapic_timer.timer_mode_mask;
1586
1587         if (apic->lapic_timer.timer_mode != timer_mode) {
1588                 if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
1589                                 APIC_LVT_TIMER_TSCDEADLINE)) {
1590                         cancel_apic_timer(apic);
1591                         kvm_lapic_set_reg(apic, APIC_TMICT, 0);
1592                         apic->lapic_timer.period = 0;
1593                         apic->lapic_timer.tscdeadline = 0;
1594                 }
1595                 apic->lapic_timer.timer_mode = timer_mode;
1596                 limit_periodic_timer_frequency(apic);
1597         }
1598 }
1599
1600 /*
1601  * On APICv, this test will cause a busy wait
1602  * during a higher-priority task.
1603  */
1604
1605 static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1606 {
1607         struct kvm_lapic *apic = vcpu->arch.apic;
1608         u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
1609
1610         if (kvm_apic_hw_enabled(apic)) {
1611                 int vec = reg & APIC_VECTOR_MASK;
1612                 void *bitmap = apic->regs + APIC_ISR;
1613
1614                 if (apic->apicv_active)
1615                         bitmap = apic->regs + APIC_IRR;
1616
1617                 if (apic_test_vector(vec, bitmap))
1618                         return true;
1619         }
1620         return false;
1621 }
1622
1623 static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
1624 {
1625         u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
1626
1627         /*
1628          * If the guest TSC is running at a different ratio than the host, then
1629          * convert the delay to nanoseconds to achieve an accurate delay.  Note
1630          * that __delay() uses delay_tsc whenever the hardware has TSC, thus
1631          * always for VMX enabled hardware.
1632          */
1633         if (vcpu->arch.tsc_scaling_ratio == kvm_caps.default_tsc_scaling_ratio) {
1634                 __delay(min(guest_cycles,
1635                         nsec_to_cycles(vcpu, timer_advance_ns)));
1636         } else {
1637                 u64 delay_ns = guest_cycles * 1000000ULL;
1638                 do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
1639                 ndelay(min_t(u32, delay_ns, timer_advance_ns));
1640         }
1641 }
1642
1643 static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
1644                                               s64 advance_expire_delta)
1645 {
1646         struct kvm_lapic *apic = vcpu->arch.apic;
1647         u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
1648         u64 ns;
1649
1650         /* Do not adjust for tiny fluctuations or large random spikes. */
1651         if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
1652             abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
1653                 return;
1654
1655         /* too early */
1656         if (advance_expire_delta < 0) {
1657                 ns = -advance_expire_delta * 1000000ULL;
1658                 do_div(ns, vcpu->arch.virtual_tsc_khz);
1659                 timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1660         } else {
1661         /* too late */
1662                 ns = advance_expire_delta * 1000000ULL;
1663                 do_div(ns, vcpu->arch.virtual_tsc_khz);
1664                 timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1665         }
1666
1667         if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX))
1668                 timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
1669         apic->lapic_timer.timer_advance_ns = timer_advance_ns;
1670 }
1671
1672 static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1673 {
1674         struct kvm_lapic *apic = vcpu->arch.apic;
1675         u64 guest_tsc, tsc_deadline;
1676
1677         tsc_deadline = apic->lapic_timer.expired_tscdeadline;
1678         apic->lapic_timer.expired_tscdeadline = 0;
1679         guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1680         trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
1681
1682         if (lapic_timer_advance_dynamic) {
1683                 adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline);
1684                 /*
1685                  * If the timer fired early, reread the TSC to account for the
1686                  * overhead of the above adjustment to avoid waiting longer
1687                  * than is necessary.
1688                  */
1689                 if (guest_tsc < tsc_deadline)
1690                         guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1691         }
1692
1693         if (guest_tsc < tsc_deadline)
1694                 __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
1695 }
1696
1697 void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1698 {
1699         if (lapic_in_kernel(vcpu) &&
1700             vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1701             vcpu->arch.apic->lapic_timer.timer_advance_ns &&
1702             lapic_timer_int_injected(vcpu))
1703                 __kvm_wait_lapic_expire(vcpu);
1704 }
1705 EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
1706
1707 static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
1708 {
1709         struct kvm_timer *ktimer = &apic->lapic_timer;
1710
1711         kvm_apic_local_deliver(apic, APIC_LVTT);
1712         if (apic_lvtt_tscdeadline(apic)) {
1713                 ktimer->tscdeadline = 0;
1714         } else if (apic_lvtt_oneshot(apic)) {
1715                 ktimer->tscdeadline = 0;
1716                 ktimer->target_expiration = 0;
1717         }
1718 }
1719
1720 static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
1721 {
1722         struct kvm_vcpu *vcpu = apic->vcpu;
1723         struct kvm_timer *ktimer = &apic->lapic_timer;
1724
1725         if (atomic_read(&apic->lapic_timer.pending))
1726                 return;
1727
1728         if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
1729                 ktimer->expired_tscdeadline = ktimer->tscdeadline;
1730
1731         if (!from_timer_fn && apic->apicv_active) {
1732                 WARN_ON(kvm_get_running_vcpu() != vcpu);
1733                 kvm_apic_inject_pending_timer_irqs(apic);
1734                 return;
1735         }
1736
1737         if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
1738                 /*
1739                  * Ensure the guest's timer has truly expired before posting an
1740                  * interrupt.  Open code the relevant checks to avoid querying
1741                  * lapic_timer_int_injected(), which will be false since the
1742                  * interrupt isn't yet injected.  Waiting until after injecting
1743                  * is not an option since that won't help a posted interrupt.
1744                  */
1745                 if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1746                     vcpu->arch.apic->lapic_timer.timer_advance_ns)
1747                         __kvm_wait_lapic_expire(vcpu);
1748                 kvm_apic_inject_pending_timer_irqs(apic);
1749                 return;
1750         }
1751
1752         atomic_inc(&apic->lapic_timer.pending);
1753         kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
1754         if (from_timer_fn)
1755                 kvm_vcpu_kick(vcpu);
1756 }
1757
1758 static void start_sw_tscdeadline(struct kvm_lapic *apic)
1759 {
1760         struct kvm_timer *ktimer = &apic->lapic_timer;
1761         u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
1762         u64 ns = 0;
1763         ktime_t expire;
1764         struct kvm_vcpu *vcpu = apic->vcpu;
1765         unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
1766         unsigned long flags;
1767         ktime_t now;
1768
1769         if (unlikely(!tscdeadline || !this_tsc_khz))
1770                 return;
1771
1772         local_irq_save(flags);
1773
1774         now = ktime_get();
1775         guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1776
1777         ns = (tscdeadline - guest_tsc) * 1000000ULL;
1778         do_div(ns, this_tsc_khz);
1779
1780         if (likely(tscdeadline > guest_tsc) &&
1781             likely(ns > apic->lapic_timer.timer_advance_ns)) {
1782                 expire = ktime_add_ns(now, ns);
1783                 expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
1784                 hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
1785         } else
1786                 apic_timer_expired(apic, false);
1787
1788         local_irq_restore(flags);
1789 }
1790
1791 static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
1792 {
1793         return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count;
1794 }
1795
1796 static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
1797 {
1798         ktime_t now, remaining;
1799         u64 ns_remaining_old, ns_remaining_new;
1800
1801         apic->lapic_timer.period =
1802                         tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
1803         limit_periodic_timer_frequency(apic);
1804
1805         now = ktime_get();
1806         remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1807         if (ktime_to_ns(remaining) < 0)
1808                 remaining = 0;
1809
1810         ns_remaining_old = ktime_to_ns(remaining);
1811         ns_remaining_new = mul_u64_u32_div(ns_remaining_old,
1812                                            apic->divide_count, old_divisor);
1813
1814         apic->lapic_timer.tscdeadline +=
1815                 nsec_to_cycles(apic->vcpu, ns_remaining_new) -
1816                 nsec_to_cycles(apic->vcpu, ns_remaining_old);
1817         apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
1818 }
1819
1820 static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg)
1821 {
1822         ktime_t now;
1823         u64 tscl = rdtsc();
1824         s64 deadline;
1825
1826         now = ktime_get();
1827         apic->lapic_timer.period =
1828                         tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
1829
1830         if (!apic->lapic_timer.period) {
1831                 apic->lapic_timer.tscdeadline = 0;
1832                 return false;
1833         }
1834
1835         limit_periodic_timer_frequency(apic);
1836         deadline = apic->lapic_timer.period;
1837
1838         if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
1839                 if (unlikely(count_reg != APIC_TMICT)) {
1840                         deadline = tmict_to_ns(apic,
1841                                      kvm_lapic_get_reg(apic, count_reg));
1842                         if (unlikely(deadline <= 0))
1843                                 deadline = apic->lapic_timer.period;
1844                         else if (unlikely(deadline > apic->lapic_timer.period)) {
1845                                 pr_info_ratelimited(
1846                                     "kvm: vcpu %i: requested lapic timer restore with "
1847                                     "starting count register %#x=%u (%lld ns) > initial count (%lld ns). "
1848                                     "Using initial count to start timer.\n",
1849                                     apic->vcpu->vcpu_id,
1850                                     count_reg,
1851                                     kvm_lapic_get_reg(apic, count_reg),
1852                                     deadline, apic->lapic_timer.period);
1853                                 kvm_lapic_set_reg(apic, count_reg, 0);
1854                                 deadline = apic->lapic_timer.period;
1855                         }
1856                 }
1857         }
1858
1859         apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1860                 nsec_to_cycles(apic->vcpu, deadline);
1861         apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline);
1862
1863         return true;
1864 }
1865
1866 static void advance_periodic_target_expiration(struct kvm_lapic *apic)
1867 {
1868         ktime_t now = ktime_get();
1869         u64 tscl = rdtsc();
1870         ktime_t delta;
1871
1872         /*
1873          * Synchronize both deadlines to the same time source or
1874          * differences in the periods (caused by differences in the
1875          * underlying clocks or numerical approximation errors) will
1876          * cause the two to drift apart over time as the errors
1877          * accumulate.
1878          */
1879         apic->lapic_timer.target_expiration =
1880                 ktime_add_ns(apic->lapic_timer.target_expiration,
1881                                 apic->lapic_timer.period);
1882         delta = ktime_sub(apic->lapic_timer.target_expiration, now);
1883         apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1884                 nsec_to_cycles(apic->vcpu, delta);
1885 }
1886
1887 static void start_sw_period(struct kvm_lapic *apic)
1888 {
1889         if (!apic->lapic_timer.period)
1890                 return;
1891
1892         if (ktime_after(ktime_get(),
1893                         apic->lapic_timer.target_expiration)) {
1894                 apic_timer_expired(apic, false);
1895
1896                 if (apic_lvtt_oneshot(apic))
1897                         return;
1898
1899                 advance_periodic_target_expiration(apic);
1900         }
1901
1902         hrtimer_start(&apic->lapic_timer.timer,
1903                 apic->lapic_timer.target_expiration,
1904                 HRTIMER_MODE_ABS_HARD);
1905 }
1906
1907 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
1908 {
1909         if (!lapic_in_kernel(vcpu))
1910                 return false;
1911
1912         return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
1913 }
1914 EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
1915
1916 static void cancel_hv_timer(struct kvm_lapic *apic)
1917 {
1918         WARN_ON(preemptible());
1919         WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1920         static_call(kvm_x86_cancel_hv_timer)(apic->vcpu);
1921         apic->lapic_timer.hv_timer_in_use = false;
1922 }
1923
1924 static bool start_hv_timer(struct kvm_lapic *apic)
1925 {
1926         struct kvm_timer *ktimer = &apic->lapic_timer;
1927         struct kvm_vcpu *vcpu = apic->vcpu;
1928         bool expired;
1929
1930         WARN_ON(preemptible());
1931         if (!kvm_can_use_hv_timer(vcpu))
1932                 return false;
1933
1934         if (!ktimer->tscdeadline)
1935                 return false;
1936
1937         if (static_call(kvm_x86_set_hv_timer)(vcpu, ktimer->tscdeadline, &expired))
1938                 return false;
1939
1940         ktimer->hv_timer_in_use = true;
1941         hrtimer_cancel(&ktimer->timer);
1942
1943         /*
1944          * To simplify handling the periodic timer, leave the hv timer running
1945          * even if the deadline timer has expired, i.e. rely on the resulting
1946          * VM-Exit to recompute the periodic timer's target expiration.
1947          */
1948         if (!apic_lvtt_period(apic)) {
1949                 /*
1950                  * Cancel the hv timer if the sw timer fired while the hv timer
1951                  * was being programmed, or if the hv timer itself expired.
1952                  */
1953                 if (atomic_read(&ktimer->pending)) {
1954                         cancel_hv_timer(apic);
1955                 } else if (expired) {
1956                         apic_timer_expired(apic, false);
1957                         cancel_hv_timer(apic);
1958                 }
1959         }
1960
1961         trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
1962
1963         return true;
1964 }
1965
1966 static void start_sw_timer(struct kvm_lapic *apic)
1967 {
1968         struct kvm_timer *ktimer = &apic->lapic_timer;
1969
1970         WARN_ON(preemptible());
1971         if (apic->lapic_timer.hv_timer_in_use)
1972                 cancel_hv_timer(apic);
1973         if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
1974                 return;
1975
1976         if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1977                 start_sw_period(apic);
1978         else if (apic_lvtt_tscdeadline(apic))
1979                 start_sw_tscdeadline(apic);
1980         trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false);
1981 }
1982
1983 static void restart_apic_timer(struct kvm_lapic *apic)
1984 {
1985         preempt_disable();
1986
1987         if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
1988                 goto out;
1989
1990         if (!start_hv_timer(apic))
1991                 start_sw_timer(apic);
1992 out:
1993         preempt_enable();
1994 }
1995
1996 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
1997 {
1998         struct kvm_lapic *apic = vcpu->arch.apic;
1999
2000         preempt_disable();
2001         /* If the preempt notifier has already run, it also called apic_timer_expired */
2002         if (!apic->lapic_timer.hv_timer_in_use)
2003                 goto out;
2004         WARN_ON(kvm_vcpu_is_blocking(vcpu));
2005         apic_timer_expired(apic, false);
2006         cancel_hv_timer(apic);
2007
2008         if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
2009                 advance_periodic_target_expiration(apic);
2010                 restart_apic_timer(apic);
2011         }
2012 out:
2013         preempt_enable();
2014 }
2015 EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
2016
2017 void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
2018 {
2019         restart_apic_timer(vcpu->arch.apic);
2020 }
2021
2022 void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
2023 {
2024         struct kvm_lapic *apic = vcpu->arch.apic;
2025
2026         preempt_disable();
2027         /* Possibly the TSC deadline timer is not enabled yet */
2028         if (apic->lapic_timer.hv_timer_in_use)
2029                 start_sw_timer(apic);
2030         preempt_enable();
2031 }
2032
2033 void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
2034 {
2035         struct kvm_lapic *apic = vcpu->arch.apic;
2036
2037         WARN_ON(!apic->lapic_timer.hv_timer_in_use);
2038         restart_apic_timer(apic);
2039 }
2040
2041 static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg)
2042 {
2043         atomic_set(&apic->lapic_timer.pending, 0);
2044
2045         if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
2046             && !set_target_expiration(apic, count_reg))
2047                 return;
2048
2049         restart_apic_timer(apic);
2050 }
2051
2052 static void start_apic_timer(struct kvm_lapic *apic)
2053 {
2054         __start_apic_timer(apic, APIC_TMICT);
2055 }
2056
2057 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
2058 {
2059         bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
2060
2061         if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
2062                 apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
2063                 if (lvt0_in_nmi_mode) {
2064                         atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
2065                 } else
2066                         atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
2067         }
2068 }
2069
2070 static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic)
2071 {
2072         struct kvm *kvm = apic->vcpu->kvm;
2073
2074         if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm))
2075                 return;
2076
2077         if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id)
2078                 return;
2079
2080         kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
2081 }
2082
2083 static int get_lvt_index(u32 reg)
2084 {
2085         if (reg == APIC_LVTCMCI)
2086                 return LVT_CMCI;
2087         if (reg < APIC_LVTT || reg > APIC_LVTERR)
2088                 return -1;
2089         return array_index_nospec(
2090                         (reg - APIC_LVTT) >> 4, KVM_APIC_MAX_NR_LVT_ENTRIES);
2091 }
2092
2093 static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
2094 {
2095         int ret = 0;
2096
2097         trace_kvm_apic_write(reg, val);
2098
2099         switch (reg) {
2100         case APIC_ID:           /* Local APIC ID */
2101                 if (!apic_x2apic_mode(apic)) {
2102                         kvm_apic_set_xapic_id(apic, val >> 24);
2103                         kvm_lapic_xapic_id_updated(apic);
2104                 } else {
2105                         ret = 1;
2106                 }
2107                 break;
2108
2109         case APIC_TASKPRI:
2110                 report_tpr_access(apic, true);
2111                 apic_set_tpr(apic, val & 0xff);
2112                 break;
2113
2114         case APIC_EOI:
2115                 apic_set_eoi(apic);
2116                 break;
2117
2118         case APIC_LDR:
2119                 if (!apic_x2apic_mode(apic))
2120                         kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
2121                 else
2122                         ret = 1;
2123                 break;
2124
2125         case APIC_DFR:
2126                 if (!apic_x2apic_mode(apic))
2127                         kvm_apic_set_dfr(apic, val | 0x0FFFFFFF);
2128                 else
2129                         ret = 1;
2130                 break;
2131
2132         case APIC_SPIV: {
2133                 u32 mask = 0x3ff;
2134                 if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
2135                         mask |= APIC_SPIV_DIRECTED_EOI;
2136                 apic_set_spiv(apic, val & mask);
2137                 if (!(val & APIC_SPIV_APIC_ENABLED)) {
2138                         int i;
2139
2140                         for (i = 0; i < apic->nr_lvt_entries; i++) {
2141                                 kvm_lapic_set_reg(apic, APIC_LVTx(i),
2142                                         kvm_lapic_get_reg(apic, APIC_LVTx(i)) | APIC_LVT_MASKED);
2143                         }
2144                         apic_update_lvtt(apic);
2145                         atomic_set(&apic->lapic_timer.pending, 0);
2146
2147                 }
2148                 break;
2149         }
2150         case APIC_ICR:
2151                 WARN_ON_ONCE(apic_x2apic_mode(apic));
2152
2153                 /* No delay here, so we always clear the pending bit */
2154                 val &= ~APIC_ICR_BUSY;
2155                 kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
2156                 kvm_lapic_set_reg(apic, APIC_ICR, val);
2157                 break;
2158         case APIC_ICR2:
2159                 if (apic_x2apic_mode(apic))
2160                         ret = 1;
2161                 else
2162                         kvm_lapic_set_reg(apic, APIC_ICR2, val & 0xff000000);
2163                 break;
2164
2165         case APIC_LVT0:
2166                 apic_manage_nmi_watchdog(apic, val);
2167                 fallthrough;
2168         case APIC_LVTTHMR:
2169         case APIC_LVTPC:
2170         case APIC_LVT1:
2171         case APIC_LVTERR:
2172         case APIC_LVTCMCI: {
2173                 u32 index = get_lvt_index(reg);
2174                 if (!kvm_lapic_lvt_supported(apic, index)) {
2175                         ret = 1;
2176                         break;
2177                 }
2178                 if (!kvm_apic_sw_enabled(apic))
2179                         val |= APIC_LVT_MASKED;
2180                 val &= apic_lvt_mask[index];
2181                 kvm_lapic_set_reg(apic, reg, val);
2182                 break;
2183         }
2184
2185         case APIC_LVTT:
2186                 if (!kvm_apic_sw_enabled(apic))
2187                         val |= APIC_LVT_MASKED;
2188                 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
2189                 kvm_lapic_set_reg(apic, APIC_LVTT, val);
2190                 apic_update_lvtt(apic);
2191                 break;
2192
2193         case APIC_TMICT:
2194                 if (apic_lvtt_tscdeadline(apic))
2195                         break;
2196
2197                 cancel_apic_timer(apic);
2198                 kvm_lapic_set_reg(apic, APIC_TMICT, val);
2199                 start_apic_timer(apic);
2200                 break;
2201
2202         case APIC_TDCR: {
2203                 uint32_t old_divisor = apic->divide_count;
2204
2205                 kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb);
2206                 update_divide_count(apic);
2207                 if (apic->divide_count != old_divisor &&
2208                                 apic->lapic_timer.period) {
2209                         hrtimer_cancel(&apic->lapic_timer.timer);
2210                         update_target_expiration(apic, old_divisor);
2211                         restart_apic_timer(apic);
2212                 }
2213                 break;
2214         }
2215         case APIC_ESR:
2216                 if (apic_x2apic_mode(apic) && val != 0)
2217                         ret = 1;
2218                 break;
2219
2220         case APIC_SELF_IPI:
2221                 if (apic_x2apic_mode(apic))
2222                         kvm_apic_send_ipi(apic, APIC_DEST_SELF | (val & APIC_VECTOR_MASK), 0);
2223                 else
2224                         ret = 1;
2225                 break;
2226         default:
2227                 ret = 1;
2228                 break;
2229         }
2230
2231         /*
2232          * Recalculate APIC maps if necessary, e.g. if the software enable bit
2233          * was toggled, the APIC ID changed, etc...   The maps are marked dirty
2234          * on relevant changes, i.e. this is a nop for most writes.
2235          */
2236         kvm_recalculate_apic_map(apic->vcpu->kvm);
2237
2238         return ret;
2239 }
2240
2241 static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
2242                             gpa_t address, int len, const void *data)
2243 {
2244         struct kvm_lapic *apic = to_lapic(this);
2245         unsigned int offset = address - apic->base_address;
2246         u32 val;
2247
2248         if (!apic_mmio_in_range(apic, address))
2249                 return -EOPNOTSUPP;
2250
2251         if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
2252                 if (!kvm_check_has_quirk(vcpu->kvm,
2253                                          KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
2254                         return -EOPNOTSUPP;
2255
2256                 return 0;
2257         }
2258
2259         /*
2260          * APIC register must be aligned on 128-bits boundary.
2261          * 32/64/128 bits registers must be accessed thru 32 bits.
2262          * Refer SDM 8.4.1
2263          */
2264         if (len != 4 || (offset & 0xf))
2265                 return 0;
2266
2267         val = *(u32*)data;
2268
2269         kvm_lapic_reg_write(apic, offset & 0xff0, val);
2270
2271         return 0;
2272 }
2273
2274 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
2275 {
2276         kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
2277 }
2278 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
2279
2280 /* emulate APIC access in a trap manner */
2281 void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
2282 {
2283         struct kvm_lapic *apic = vcpu->arch.apic;
2284         u64 val;
2285
2286         if (apic_x2apic_mode(apic)) {
2287                 /*
2288                  * When guest APIC is in x2APIC mode and IPI virtualization
2289                  * is enabled, accessing APIC_ICR may cause trap-like VM-exit
2290                  * on Intel hardware. Other offsets are not possible.
2291                  */
2292                 if (WARN_ON_ONCE(offset != APIC_ICR))
2293                         return;
2294
2295                 kvm_lapic_msr_read(apic, offset, &val);
2296                 kvm_apic_send_ipi(apic, (u32)val, (u32)(val >> 32));
2297                 trace_kvm_apic_write(APIC_ICR, val);
2298         } else {
2299                 val = kvm_lapic_get_reg(apic, offset);
2300
2301                 /* TODO: optimize to just emulate side effect w/o one more write */
2302                 kvm_lapic_reg_write(apic, offset, (u32)val);
2303         }
2304 }
2305 EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
2306
2307 void kvm_free_lapic(struct kvm_vcpu *vcpu)
2308 {
2309         struct kvm_lapic *apic = vcpu->arch.apic;
2310
2311         if (!vcpu->arch.apic)
2312                 return;
2313
2314         hrtimer_cancel(&apic->lapic_timer.timer);
2315
2316         if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
2317                 static_branch_slow_dec_deferred(&apic_hw_disabled);
2318
2319         if (!apic->sw_enabled)
2320                 static_branch_slow_dec_deferred(&apic_sw_disabled);
2321
2322         if (apic->regs)
2323                 free_page((unsigned long)apic->regs);
2324
2325         kfree(apic);
2326 }
2327
2328 /*
2329  *----------------------------------------------------------------------
2330  * LAPIC interface
2331  *----------------------------------------------------------------------
2332  */
2333 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
2334 {
2335         struct kvm_lapic *apic = vcpu->arch.apic;
2336
2337         if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
2338                 return 0;
2339
2340         return apic->lapic_timer.tscdeadline;
2341 }
2342
2343 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
2344 {
2345         struct kvm_lapic *apic = vcpu->arch.apic;
2346
2347         if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
2348                 return;
2349
2350         hrtimer_cancel(&apic->lapic_timer.timer);
2351         apic->lapic_timer.tscdeadline = data;
2352         start_apic_timer(apic);
2353 }
2354
2355 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
2356 {
2357         apic_set_tpr(vcpu->arch.apic, (cr8 & 0x0f) << 4);
2358 }
2359
2360 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
2361 {
2362         u64 tpr;
2363
2364         tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
2365
2366         return (tpr & 0xf0) >> 4;
2367 }
2368
2369 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
2370 {
2371         u64 old_value = vcpu->arch.apic_base;
2372         struct kvm_lapic *apic = vcpu->arch.apic;
2373
2374         vcpu->arch.apic_base = value;
2375
2376         if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
2377                 kvm_update_cpuid_runtime(vcpu);
2378
2379         if (!apic)
2380                 return;
2381
2382         /* update jump label if enable bit changes */
2383         if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
2384                 if (value & MSR_IA32_APICBASE_ENABLE) {
2385                         kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2386                         static_branch_slow_dec_deferred(&apic_hw_disabled);
2387                         /* Check if there are APF page ready requests pending */
2388                         kvm_make_request(KVM_REQ_APF_READY, vcpu);
2389                 } else {
2390                         static_branch_inc(&apic_hw_disabled.key);
2391                         atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
2392                 }
2393         }
2394
2395         if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
2396                 kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
2397
2398         if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) {
2399                 kvm_vcpu_update_apicv(vcpu);
2400                 static_call_cond(kvm_x86_set_virtual_apic_mode)(vcpu);
2401         }
2402
2403         apic->base_address = apic->vcpu->arch.apic_base &
2404                              MSR_IA32_APICBASE_BASE;
2405
2406         if ((value & MSR_IA32_APICBASE_ENABLE) &&
2407              apic->base_address != APIC_DEFAULT_PHYS_BASE) {
2408                 kvm_set_apicv_inhibit(apic->vcpu->kvm,
2409                                       APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
2410         }
2411 }
2412
2413 void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
2414 {
2415         struct kvm_lapic *apic = vcpu->arch.apic;
2416
2417         if (apic->apicv_active) {
2418                 /* irr_pending is always true when apicv is activated. */
2419                 apic->irr_pending = true;
2420                 apic->isr_count = 1;
2421         } else {
2422                 /*
2423                  * Don't clear irr_pending, searching the IRR can race with
2424                  * updates from the CPU as APICv is still active from hardware's
2425                  * perspective.  The flag will be cleared as appropriate when
2426                  * KVM injects the interrupt.
2427                  */
2428                 apic->isr_count = count_vectors(apic->regs + APIC_ISR);
2429         }
2430 }
2431 EXPORT_SYMBOL_GPL(kvm_apic_update_apicv);
2432
2433 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
2434 {
2435         struct kvm_lapic *apic = vcpu->arch.apic;
2436         u64 msr_val;
2437         int i;
2438
2439         if (!init_event) {
2440                 msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
2441                 if (kvm_vcpu_is_reset_bsp(vcpu))
2442                         msr_val |= MSR_IA32_APICBASE_BSP;
2443                 kvm_lapic_set_base(vcpu, msr_val);
2444         }
2445
2446         if (!apic)
2447                 return;
2448
2449         /* Stop the timer in case it's a reset to an active apic */
2450         hrtimer_cancel(&apic->lapic_timer.timer);
2451
2452         /* The xAPIC ID is set at RESET even if the APIC was already enabled. */
2453         if (!init_event)
2454                 kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2455         kvm_apic_set_version(apic->vcpu);
2456
2457         for (i = 0; i < apic->nr_lvt_entries; i++)
2458                 kvm_lapic_set_reg(apic, APIC_LVTx(i), APIC_LVT_MASKED);
2459         apic_update_lvtt(apic);
2460         if (kvm_vcpu_is_reset_bsp(vcpu) &&
2461             kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
2462                 kvm_lapic_set_reg(apic, APIC_LVT0,
2463                              SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
2464         apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2465
2466         kvm_apic_set_dfr(apic, 0xffffffffU);
2467         apic_set_spiv(apic, 0xff);
2468         kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
2469         if (!apic_x2apic_mode(apic))
2470                 kvm_apic_set_ldr(apic, 0);
2471         kvm_lapic_set_reg(apic, APIC_ESR, 0);
2472         if (!apic_x2apic_mode(apic)) {
2473                 kvm_lapic_set_reg(apic, APIC_ICR, 0);
2474                 kvm_lapic_set_reg(apic, APIC_ICR2, 0);
2475         } else {
2476                 kvm_lapic_set_reg64(apic, APIC_ICR, 0);
2477         }
2478         kvm_lapic_set_reg(apic, APIC_TDCR, 0);
2479         kvm_lapic_set_reg(apic, APIC_TMICT, 0);
2480         for (i = 0; i < 8; i++) {
2481                 kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
2482                 kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
2483                 kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
2484         }
2485         kvm_apic_update_apicv(vcpu);
2486         apic->highest_isr_cache = -1;
2487         update_divide_count(apic);
2488         atomic_set(&apic->lapic_timer.pending, 0);
2489
2490         vcpu->arch.pv_eoi.msr_val = 0;
2491         apic_update_ppr(apic);
2492         if (apic->apicv_active) {
2493                 static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu);
2494                 static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, -1);
2495                 static_call_cond(kvm_x86_hwapic_isr_update)(-1);
2496         }
2497
2498         vcpu->arch.apic_arb_prio = 0;
2499         vcpu->arch.apic_attention = 0;
2500
2501         kvm_recalculate_apic_map(vcpu->kvm);
2502 }
2503
2504 /*
2505  *----------------------------------------------------------------------
2506  * timer interface
2507  *----------------------------------------------------------------------
2508  */
2509
2510 static bool lapic_is_periodic(struct kvm_lapic *apic)
2511 {
2512         return apic_lvtt_period(apic);
2513 }
2514
2515 int apic_has_pending_timer(struct kvm_vcpu *vcpu)
2516 {
2517         struct kvm_lapic *apic = vcpu->arch.apic;
2518
2519         if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
2520                 return atomic_read(&apic->lapic_timer.pending);
2521
2522         return 0;
2523 }
2524
2525 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
2526 {
2527         u32 reg = kvm_lapic_get_reg(apic, lvt_type);
2528         int vector, mode, trig_mode;
2529
2530         if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
2531                 vector = reg & APIC_VECTOR_MASK;
2532                 mode = reg & APIC_MODE_MASK;
2533                 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
2534                 return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
2535                                         NULL);
2536         }
2537         return 0;
2538 }
2539
2540 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
2541 {
2542         struct kvm_lapic *apic = vcpu->arch.apic;
2543
2544         if (apic)
2545                 kvm_apic_local_deliver(apic, APIC_LVT0);
2546 }
2547
2548 static const struct kvm_io_device_ops apic_mmio_ops = {
2549         .read     = apic_mmio_read,
2550         .write    = apic_mmio_write,
2551 };
2552
2553 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
2554 {
2555         struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
2556         struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
2557
2558         apic_timer_expired(apic, true);
2559
2560         if (lapic_is_periodic(apic)) {
2561                 advance_periodic_target_expiration(apic);
2562                 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
2563                 return HRTIMER_RESTART;
2564         } else
2565                 return HRTIMER_NORESTART;
2566 }
2567
2568 int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
2569 {
2570         struct kvm_lapic *apic;
2571
2572         ASSERT(vcpu != NULL);
2573
2574         apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
2575         if (!apic)
2576                 goto nomem;
2577
2578         vcpu->arch.apic = apic;
2579
2580         apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
2581         if (!apic->regs) {
2582                 printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
2583                        vcpu->vcpu_id);
2584                 goto nomem_free_apic;
2585         }
2586         apic->vcpu = vcpu;
2587
2588         apic->nr_lvt_entries = kvm_apic_calc_nr_lvt_entries(vcpu);
2589
2590         hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
2591                      HRTIMER_MODE_ABS_HARD);
2592         apic->lapic_timer.timer.function = apic_timer_fn;
2593         if (timer_advance_ns == -1) {
2594                 apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
2595                 lapic_timer_advance_dynamic = true;
2596         } else {
2597                 apic->lapic_timer.timer_advance_ns = timer_advance_ns;
2598                 lapic_timer_advance_dynamic = false;
2599         }
2600
2601         /*
2602          * Stuff the APIC ENABLE bit in lieu of temporarily incrementing
2603          * apic_hw_disabled; the full RESET value is set by kvm_lapic_reset().
2604          */
2605         vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
2606         static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
2607         kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
2608
2609         return 0;
2610 nomem_free_apic:
2611         kfree(apic);
2612         vcpu->arch.apic = NULL;
2613 nomem:
2614         return -ENOMEM;
2615 }
2616
2617 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
2618 {
2619         struct kvm_lapic *apic = vcpu->arch.apic;
2620         u32 ppr;
2621
2622         if (!kvm_apic_present(vcpu))
2623                 return -1;
2624
2625         __apic_update_ppr(apic, &ppr);
2626         return apic_has_interrupt_for_ppr(apic, ppr);
2627 }
2628 EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt);
2629
2630 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
2631 {
2632         u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
2633
2634         if (!kvm_apic_hw_enabled(vcpu->arch.apic))
2635                 return 1;
2636         if ((lvt0 & APIC_LVT_MASKED) == 0 &&
2637             GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
2638                 return 1;
2639         return 0;
2640 }
2641
2642 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
2643 {
2644         struct kvm_lapic *apic = vcpu->arch.apic;
2645
2646         if (atomic_read(&apic->lapic_timer.pending) > 0) {
2647                 kvm_apic_inject_pending_timer_irqs(apic);
2648                 atomic_set(&apic->lapic_timer.pending, 0);
2649         }
2650 }
2651
2652 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
2653 {
2654         int vector = kvm_apic_has_interrupt(vcpu);
2655         struct kvm_lapic *apic = vcpu->arch.apic;
2656         u32 ppr;
2657
2658         if (vector == -1)
2659                 return -1;
2660
2661         /*
2662          * We get here even with APIC virtualization enabled, if doing
2663          * nested virtualization and L1 runs with the "acknowledge interrupt
2664          * on exit" mode.  Then we cannot inject the interrupt via RVI,
2665          * because the process would deliver it through the IDT.
2666          */
2667
2668         apic_clear_irr(vector, apic);
2669         if (to_hv_vcpu(vcpu) && test_bit(vector, to_hv_synic(vcpu)->auto_eoi_bitmap)) {
2670                 /*
2671                  * For auto-EOI interrupts, there might be another pending
2672                  * interrupt above PPR, so check whether to raise another
2673                  * KVM_REQ_EVENT.
2674                  */
2675                 apic_update_ppr(apic);
2676         } else {
2677                 /*
2678                  * For normal interrupts, PPR has been raised and there cannot
2679                  * be a higher-priority pending interrupt---except if there was
2680                  * a concurrent interrupt injection, but that would have
2681                  * triggered KVM_REQ_EVENT already.
2682                  */
2683                 apic_set_isr(vector, apic);
2684                 __apic_update_ppr(apic, &ppr);
2685         }
2686
2687         return vector;
2688 }
2689
2690 static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
2691                 struct kvm_lapic_state *s, bool set)
2692 {
2693         if (apic_x2apic_mode(vcpu->arch.apic)) {
2694                 u32 *id = (u32 *)(s->regs + APIC_ID);
2695                 u32 *ldr = (u32 *)(s->regs + APIC_LDR);
2696                 u64 icr;
2697
2698                 if (vcpu->kvm->arch.x2apic_format) {
2699                         if (*id != vcpu->vcpu_id)
2700                                 return -EINVAL;
2701                 } else {
2702                         if (set)
2703                                 *id >>= 24;
2704                         else
2705                                 *id <<= 24;
2706                 }
2707
2708                 /*
2709                  * In x2APIC mode, the LDR is fixed and based on the id.  And
2710                  * ICR is internally a single 64-bit register, but needs to be
2711                  * split to ICR+ICR2 in userspace for backwards compatibility.
2712                  */
2713                 if (set) {
2714                         *ldr = kvm_apic_calc_x2apic_ldr(*id);
2715
2716                         icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
2717                               (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
2718                         __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr);
2719                 } else {
2720                         icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
2721                         __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
2722                 }
2723         } else {
2724                 kvm_lapic_xapic_id_updated(vcpu->arch.apic);
2725         }
2726
2727         return 0;
2728 }
2729
2730 int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2731 {
2732         memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
2733
2734         /*
2735          * Get calculated timer current count for remaining timer period (if
2736          * any) and store it in the returned register set.
2737          */
2738         __kvm_lapic_set_reg(s->regs, APIC_TMCCT,
2739                             __apic_read(vcpu->arch.apic, APIC_TMCCT));
2740
2741         return kvm_apic_state_fixup(vcpu, s, false);
2742 }
2743
2744 int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2745 {
2746         struct kvm_lapic *apic = vcpu->arch.apic;
2747         int r;
2748
2749         kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
2750         /* set SPIV separately to get count of SW disabled APICs right */
2751         apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
2752
2753         r = kvm_apic_state_fixup(vcpu, s, true);
2754         if (r) {
2755                 kvm_recalculate_apic_map(vcpu->kvm);
2756                 return r;
2757         }
2758         memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
2759
2760         atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
2761         kvm_recalculate_apic_map(vcpu->kvm);
2762         kvm_apic_set_version(vcpu);
2763
2764         apic_update_ppr(apic);
2765         cancel_apic_timer(apic);
2766         apic->lapic_timer.expired_tscdeadline = 0;
2767         apic_update_lvtt(apic);
2768         apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2769         update_divide_count(apic);
2770         __start_apic_timer(apic, APIC_TMCCT);
2771         kvm_lapic_set_reg(apic, APIC_TMCCT, 0);
2772         kvm_apic_update_apicv(vcpu);
2773         apic->highest_isr_cache = -1;
2774         if (apic->apicv_active) {
2775                 static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu);
2776                 static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic));
2777                 static_call_cond(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic));
2778         }
2779         kvm_make_request(KVM_REQ_EVENT, vcpu);
2780         if (ioapic_in_kernel(vcpu->kvm))
2781                 kvm_rtc_eoi_tracking_restore_one(vcpu);
2782
2783         vcpu->arch.apic_arb_prio = 0;
2784
2785         return 0;
2786 }
2787
2788 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
2789 {
2790         struct hrtimer *timer;
2791
2792         if (!lapic_in_kernel(vcpu) ||
2793                 kvm_can_post_timer_interrupt(vcpu))
2794                 return;
2795
2796         timer = &vcpu->arch.apic->lapic_timer.timer;
2797         if (hrtimer_cancel(timer))
2798                 hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
2799 }
2800
2801 /*
2802  * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
2803  *
2804  * Detect whether guest triggered PV EOI since the
2805  * last entry. If yes, set EOI on guests's behalf.
2806  * Clear PV EOI in guest memory in any case.
2807  */
2808 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
2809                                         struct kvm_lapic *apic)
2810 {
2811         int vector;
2812         /*
2813          * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
2814          * and KVM_PV_EOI_ENABLED in guest memory as follows:
2815          *
2816          * KVM_APIC_PV_EOI_PENDING is unset:
2817          *      -> host disabled PV EOI.
2818          * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
2819          *      -> host enabled PV EOI, guest did not execute EOI yet.
2820          * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
2821          *      -> host enabled PV EOI, guest executed EOI.
2822          */
2823         BUG_ON(!pv_eoi_enabled(vcpu));
2824
2825         if (pv_eoi_test_and_clr_pending(vcpu))
2826                 return;
2827         vector = apic_set_eoi(apic);
2828         trace_kvm_pv_eoi(apic, vector);
2829 }
2830
2831 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
2832 {
2833         u32 data;
2834
2835         if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
2836                 apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
2837
2838         if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2839                 return;
2840
2841         if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2842                                   sizeof(u32)))
2843                 return;
2844
2845         apic_set_tpr(vcpu->arch.apic, data & 0xff);
2846 }
2847
2848 /*
2849  * apic_sync_pv_eoi_to_guest - called before vmentry
2850  *
2851  * Detect whether it's safe to enable PV EOI and
2852  * if yes do so.
2853  */
2854 static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
2855                                         struct kvm_lapic *apic)
2856 {
2857         if (!pv_eoi_enabled(vcpu) ||
2858             /* IRR set or many bits in ISR: could be nested. */
2859             apic->irr_pending ||
2860             /* Cache not set: could be safe but we don't bother. */
2861             apic->highest_isr_cache == -1 ||
2862             /* Need EOI to update ioapic. */
2863             kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
2864                 /*
2865                  * PV EOI was disabled by apic_sync_pv_eoi_from_guest
2866                  * so we need not do anything here.
2867                  */
2868                 return;
2869         }
2870
2871         pv_eoi_set_pending(apic->vcpu);
2872 }
2873
2874 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
2875 {
2876         u32 data, tpr;
2877         int max_irr, max_isr;
2878         struct kvm_lapic *apic = vcpu->arch.apic;
2879
2880         apic_sync_pv_eoi_to_guest(vcpu, apic);
2881
2882         if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2883                 return;
2884
2885         tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
2886         max_irr = apic_find_highest_irr(apic);
2887         if (max_irr < 0)
2888                 max_irr = 0;
2889         max_isr = apic_find_highest_isr(apic);
2890         if (max_isr < 0)
2891                 max_isr = 0;
2892         data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
2893
2894         kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2895                                 sizeof(u32));
2896 }
2897
2898 int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
2899 {
2900         if (vapic_addr) {
2901                 if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2902                                         &vcpu->arch.apic->vapic_cache,
2903                                         vapic_addr, sizeof(u32)))
2904                         return -EINVAL;
2905                 __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2906         } else {
2907                 __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2908         }
2909
2910         vcpu->arch.apic->vapic_addr = vapic_addr;
2911         return 0;
2912 }
2913
2914 int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
2915 {
2916         data &= ~APIC_ICR_BUSY;
2917
2918         kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
2919         kvm_lapic_set_reg64(apic, APIC_ICR, data);
2920         trace_kvm_apic_write(APIC_ICR, data);
2921         return 0;
2922 }
2923
2924 static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data)
2925 {
2926         u32 low;
2927
2928         if (reg == APIC_ICR) {
2929                 *data = kvm_lapic_get_reg64(apic, APIC_ICR);
2930                 return 0;
2931         }
2932
2933         if (kvm_lapic_reg_read(apic, reg, 4, &low))
2934                 return 1;
2935
2936         *data = low;
2937
2938         return 0;
2939 }
2940
2941 static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data)
2942 {
2943         /*
2944          * ICR is a 64-bit register in x2APIC mode (and Hyper'v PV vAPIC) and
2945          * can be written as such, all other registers remain accessible only
2946          * through 32-bit reads/writes.
2947          */
2948         if (reg == APIC_ICR)
2949                 return kvm_x2apic_icr_write(apic, data);
2950
2951         return kvm_lapic_reg_write(apic, reg, (u32)data);
2952 }
2953
2954 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
2955 {
2956         struct kvm_lapic *apic = vcpu->arch.apic;
2957         u32 reg = (msr - APIC_BASE_MSR) << 4;
2958
2959         if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2960                 return 1;
2961
2962         return kvm_lapic_msr_write(apic, reg, data);
2963 }
2964
2965 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
2966 {
2967         struct kvm_lapic *apic = vcpu->arch.apic;
2968         u32 reg = (msr - APIC_BASE_MSR) << 4;
2969
2970         if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2971                 return 1;
2972
2973         if (reg == APIC_DFR)
2974                 return 1;
2975
2976         return kvm_lapic_msr_read(apic, reg, data);
2977 }
2978
2979 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
2980 {
2981         if (!lapic_in_kernel(vcpu))
2982                 return 1;
2983
2984         return kvm_lapic_msr_write(vcpu->arch.apic, reg, data);
2985 }
2986
2987 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
2988 {
2989         if (!lapic_in_kernel(vcpu))
2990                 return 1;
2991
2992         return kvm_lapic_msr_read(vcpu->arch.apic, reg, data);
2993 }
2994
2995 int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
2996 {
2997         u64 addr = data & ~KVM_MSR_ENABLED;
2998         struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
2999         unsigned long new_len;
3000         int ret;
3001
3002         if (!IS_ALIGNED(addr, 4))
3003                 return 1;
3004
3005         if (data & KVM_MSR_ENABLED) {
3006                 if (addr == ghc->gpa && len <= ghc->len)
3007                         new_len = ghc->len;
3008                 else
3009                         new_len = len;
3010
3011                 ret = kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
3012                 if (ret)
3013                         return ret;
3014         }
3015
3016         vcpu->arch.pv_eoi.msr_val = data;
3017
3018         return 0;
3019 }
3020
3021 int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
3022 {
3023         struct kvm_lapic *apic = vcpu->arch.apic;
3024         u8 sipi_vector;
3025         int r;
3026         unsigned long pe;
3027
3028         if (!lapic_in_kernel(vcpu))
3029                 return 0;
3030
3031         /*
3032          * Read pending events before calling the check_events
3033          * callback.
3034          */
3035         pe = smp_load_acquire(&apic->pending_events);
3036         if (!pe)
3037                 return 0;
3038
3039         if (is_guest_mode(vcpu)) {
3040                 r = kvm_check_nested_events(vcpu);
3041                 if (r < 0)
3042                         return r == -EBUSY ? 0 : r;
3043                 /*
3044                  * If an event has happened and caused a vmexit,
3045                  * we know INITs are latched and therefore
3046                  * we will not incorrectly deliver an APIC
3047                  * event instead of a vmexit.
3048                  */
3049         }
3050
3051         /*
3052          * INITs are latched while CPU is in specific states
3053          * (SMM, VMX root mode, SVM with GIF=0).
3054          * Because a CPU cannot be in these states immediately
3055          * after it has processed an INIT signal (and thus in
3056          * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
3057          * and leave the INIT pending.
3058          */
3059         if (kvm_vcpu_latch_init(vcpu)) {
3060                 WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
3061                 if (test_bit(KVM_APIC_SIPI, &pe))
3062                         clear_bit(KVM_APIC_SIPI, &apic->pending_events);
3063                 return 0;
3064         }
3065
3066         if (test_bit(KVM_APIC_INIT, &pe)) {
3067                 clear_bit(KVM_APIC_INIT, &apic->pending_events);
3068                 kvm_vcpu_reset(vcpu, true);
3069                 if (kvm_vcpu_is_bsp(apic->vcpu))
3070                         vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3071                 else
3072                         vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
3073         }
3074         if (test_bit(KVM_APIC_SIPI, &pe)) {
3075                 clear_bit(KVM_APIC_SIPI, &apic->pending_events);
3076                 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
3077                         /* evaluate pending_events before reading the vector */
3078                         smp_rmb();
3079                         sipi_vector = apic->sipi_vector;
3080                         static_call(kvm_x86_vcpu_deliver_sipi_vector)(vcpu, sipi_vector);
3081                         vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3082                 }
3083         }
3084         return 0;
3085 }
3086
3087 void kvm_lapic_exit(void)
3088 {
3089         static_key_deferred_flush(&apic_hw_disabled);
3090         WARN_ON(static_branch_unlikely(&apic_hw_disabled.key));
3091         static_key_deferred_flush(&apic_sw_disabled);
3092         WARN_ON(static_branch_unlikely(&apic_sw_disabled.key));
3093 }