KVM: x86: Unify pr_fmt to use module name for all KVM modules
[linux-2.6-microblaze.git] / arch / x86 / kvm / svm / nested.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Kernel-based Virtual Machine driver for Linux
4  *
5  * AMD SVM support
6  *
7  * Copyright (C) 2006 Qumranet, Inc.
8  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
9  *
10  * Authors:
11  *   Yaniv Kamay  <yaniv@qumranet.com>
12  *   Avi Kivity   <avi@qumranet.com>
13  */
14
15 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
16
17 #include <linux/kvm_types.h>
18 #include <linux/kvm_host.h>
19 #include <linux/kernel.h>
20
21 #include <asm/msr-index.h>
22 #include <asm/debugreg.h>
23
24 #include "kvm_emulate.h"
25 #include "trace.h"
26 #include "mmu.h"
27 #include "x86.h"
28 #include "smm.h"
29 #include "cpuid.h"
30 #include "lapic.h"
31 #include "svm.h"
32 #include "hyperv.h"
33
34 #define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
35
36 static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
37                                        struct x86_exception *fault)
38 {
39         struct vcpu_svm *svm = to_svm(vcpu);
40         struct vmcb *vmcb = svm->vmcb;
41
42         if (vmcb->control.exit_code != SVM_EXIT_NPF) {
43                 /*
44                  * TODO: track the cause of the nested page fault, and
45                  * correctly fill in the high bits of exit_info_1.
46                  */
47                 vmcb->control.exit_code = SVM_EXIT_NPF;
48                 vmcb->control.exit_code_hi = 0;
49                 vmcb->control.exit_info_1 = (1ULL << 32);
50                 vmcb->control.exit_info_2 = fault->address;
51         }
52
53         vmcb->control.exit_info_1 &= ~0xffffffffULL;
54         vmcb->control.exit_info_1 |= fault->error_code;
55
56         nested_svm_vmexit(svm);
57 }
58
59 static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
60 {
61         struct vcpu_svm *svm = to_svm(vcpu);
62         u64 cr3 = svm->nested.ctl.nested_cr3;
63         u64 pdpte;
64         int ret;
65
66         ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte,
67                                        offset_in_page(cr3) + index * 8, 8);
68         if (ret)
69                 return 0;
70         return pdpte;
71 }
72
73 static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
74 {
75         struct vcpu_svm *svm = to_svm(vcpu);
76
77         return svm->nested.ctl.nested_cr3;
78 }
79
80 static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
81 {
82         struct vcpu_svm *svm = to_svm(vcpu);
83
84         WARN_ON(mmu_is_nested(vcpu));
85
86         vcpu->arch.mmu = &vcpu->arch.guest_mmu;
87
88         /*
89          * The NPT format depends on L1's CR4 and EFER, which is in vmcb01.  Note,
90          * when called via KVM_SET_NESTED_STATE, that state may _not_ match current
91          * vCPU state.  CR0.WP is explicitly ignored, while CR0.PG is required.
92          */
93         kvm_init_shadow_npt_mmu(vcpu, X86_CR0_PG, svm->vmcb01.ptr->save.cr4,
94                                 svm->vmcb01.ptr->save.efer,
95                                 svm->nested.ctl.nested_cr3);
96         vcpu->arch.mmu->get_guest_pgd     = nested_svm_get_tdp_cr3;
97         vcpu->arch.mmu->get_pdptr         = nested_svm_get_tdp_pdptr;
98         vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
99         vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
100 }
101
102 static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
103 {
104         vcpu->arch.mmu = &vcpu->arch.root_mmu;
105         vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
106 }
107
108 static bool nested_vmcb_needs_vls_intercept(struct vcpu_svm *svm)
109 {
110         if (!svm->v_vmload_vmsave_enabled)
111                 return true;
112
113         if (!nested_npt_enabled(svm))
114                 return true;
115
116         if (!(svm->nested.ctl.virt_ext & VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK))
117                 return true;
118
119         return false;
120 }
121
122 void recalc_intercepts(struct vcpu_svm *svm)
123 {
124         struct vmcb_control_area *c, *h;
125         struct vmcb_ctrl_area_cached *g;
126         unsigned int i;
127
128         vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
129
130         if (!is_guest_mode(&svm->vcpu))
131                 return;
132
133         c = &svm->vmcb->control;
134         h = &svm->vmcb01.ptr->control;
135         g = &svm->nested.ctl;
136
137         for (i = 0; i < MAX_INTERCEPT; i++)
138                 c->intercepts[i] = h->intercepts[i];
139
140         if (g->int_ctl & V_INTR_MASKING_MASK) {
141                 /* We only want the cr8 intercept bits of L1 */
142                 vmcb_clr_intercept(c, INTERCEPT_CR8_READ);
143                 vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE);
144
145                 /*
146                  * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not
147                  * affect any interrupt we may want to inject; therefore,
148                  * interrupt window vmexits are irrelevant to L0.
149                  */
150                 vmcb_clr_intercept(c, INTERCEPT_VINTR);
151         }
152
153         /*
154          * We want to see VMMCALLs from a nested guest only when Hyper-V L2 TLB
155          * flush feature is enabled.
156          */
157         if (!nested_svm_l2_tlb_flush_enabled(&svm->vcpu))
158                 vmcb_clr_intercept(c, INTERCEPT_VMMCALL);
159
160         for (i = 0; i < MAX_INTERCEPT; i++)
161                 c->intercepts[i] |= g->intercepts[i];
162
163         /* If SMI is not intercepted, ignore guest SMI intercept as well  */
164         if (!intercept_smi)
165                 vmcb_clr_intercept(c, INTERCEPT_SMI);
166
167         if (nested_vmcb_needs_vls_intercept(svm)) {
168                 /*
169                  * If the virtual VMLOAD/VMSAVE is not enabled for the L2,
170                  * we must intercept these instructions to correctly
171                  * emulate them in case L1 doesn't intercept them.
172                  */
173                 vmcb_set_intercept(c, INTERCEPT_VMLOAD);
174                 vmcb_set_intercept(c, INTERCEPT_VMSAVE);
175         } else {
176                 WARN_ON(!(c->virt_ext & VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK));
177         }
178 }
179
180 /*
181  * Merge L0's (KVM) and L1's (Nested VMCB) MSR permission bitmaps. The function
182  * is optimized in that it only merges the parts where KVM MSR permission bitmap
183  * may contain zero bits.
184  */
185 static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
186 {
187         struct hv_vmcb_enlightenments *hve = &svm->nested.ctl.hv_enlightenments;
188         int i;
189
190         /*
191          * MSR bitmap update can be skipped when:
192          * - MSR bitmap for L1 hasn't changed.
193          * - Nested hypervisor (L1) is attempting to launch the same L2 as
194          *   before.
195          * - Nested hypervisor (L1) is using Hyper-V emulation interface and
196          * tells KVM (L0) there were no changes in MSR bitmap for L2.
197          */
198         if (!svm->nested.force_msr_bitmap_recalc &&
199             kvm_hv_hypercall_enabled(&svm->vcpu) &&
200             hve->hv_enlightenments_control.msr_bitmap &&
201             (svm->nested.ctl.clean & BIT(HV_VMCB_NESTED_ENLIGHTENMENTS)))
202                 goto set_msrpm_base_pa;
203
204         if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
205                 return true;
206
207         for (i = 0; i < MSRPM_OFFSETS; i++) {
208                 u32 value, p;
209                 u64 offset;
210
211                 if (msrpm_offsets[i] == 0xffffffff)
212                         break;
213
214                 p      = msrpm_offsets[i];
215
216                 /* x2apic msrs are intercepted always for the nested guest */
217                 if (is_x2apic_msrpm_offset(p))
218                         continue;
219
220                 offset = svm->nested.ctl.msrpm_base_pa + (p * 4);
221
222                 if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
223                         return false;
224
225                 svm->nested.msrpm[p] = svm->msrpm[p] | value;
226         }
227
228         svm->nested.force_msr_bitmap_recalc = false;
229
230 set_msrpm_base_pa:
231         svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm));
232
233         return true;
234 }
235
236 /*
237  * Bits 11:0 of bitmap address are ignored by hardware
238  */
239 static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
240 {
241         u64 addr = PAGE_ALIGN(pa);
242
243         return kvm_vcpu_is_legal_gpa(vcpu, addr) &&
244             kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1);
245 }
246
247 static bool nested_svm_check_tlb_ctl(struct kvm_vcpu *vcpu, u8 tlb_ctl)
248 {
249         /* Nested FLUSHBYASID is not supported yet.  */
250         switch(tlb_ctl) {
251                 case TLB_CONTROL_DO_NOTHING:
252                 case TLB_CONTROL_FLUSH_ALL_ASID:
253                         return true;
254                 default:
255                         return false;
256         }
257 }
258
259 static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
260                                          struct vmcb_ctrl_area_cached *control)
261 {
262         if (CC(!vmcb12_is_intercept(control, INTERCEPT_VMRUN)))
263                 return false;
264
265         if (CC(control->asid == 0))
266                 return false;
267
268         if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled))
269                 return false;
270
271         if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa,
272                                            MSRPM_SIZE)))
273                 return false;
274         if (CC(!nested_svm_check_bitmap_pa(vcpu, control->iopm_base_pa,
275                                            IOPM_SIZE)))
276                 return false;
277
278         if (CC(!nested_svm_check_tlb_ctl(vcpu, control->tlb_ctl)))
279                 return false;
280
281         return true;
282 }
283
284 /* Common checks that apply to both L1 and L2 state.  */
285 static bool __nested_vmcb_check_save(struct kvm_vcpu *vcpu,
286                                      struct vmcb_save_area_cached *save)
287 {
288         if (CC(!(save->efer & EFER_SVME)))
289                 return false;
290
291         if (CC((save->cr0 & X86_CR0_CD) == 0 && (save->cr0 & X86_CR0_NW)) ||
292             CC(save->cr0 & ~0xffffffffULL))
293                 return false;
294
295         if (CC(!kvm_dr6_valid(save->dr6)) || CC(!kvm_dr7_valid(save->dr7)))
296                 return false;
297
298         /*
299          * These checks are also performed by KVM_SET_SREGS,
300          * except that EFER.LMA is not checked by SVM against
301          * CR0.PG && EFER.LME.
302          */
303         if ((save->efer & EFER_LME) && (save->cr0 & X86_CR0_PG)) {
304                 if (CC(!(save->cr4 & X86_CR4_PAE)) ||
305                     CC(!(save->cr0 & X86_CR0_PE)) ||
306                     CC(kvm_vcpu_is_illegal_gpa(vcpu, save->cr3)))
307                         return false;
308         }
309
310         /* Note, SVM doesn't have any additional restrictions on CR4. */
311         if (CC(!__kvm_is_valid_cr4(vcpu, save->cr4)))
312                 return false;
313
314         if (CC(!kvm_valid_efer(vcpu, save->efer)))
315                 return false;
316
317         return true;
318 }
319
320 static bool nested_vmcb_check_save(struct kvm_vcpu *vcpu)
321 {
322         struct vcpu_svm *svm = to_svm(vcpu);
323         struct vmcb_save_area_cached *save = &svm->nested.save;
324
325         return __nested_vmcb_check_save(vcpu, save);
326 }
327
328 static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu)
329 {
330         struct vcpu_svm *svm = to_svm(vcpu);
331         struct vmcb_ctrl_area_cached *ctl = &svm->nested.ctl;
332
333         return __nested_vmcb_check_controls(vcpu, ctl);
334 }
335
336 static
337 void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
338                                          struct vmcb_ctrl_area_cached *to,
339                                          struct vmcb_control_area *from)
340 {
341         unsigned int i;
342
343         for (i = 0; i < MAX_INTERCEPT; i++)
344                 to->intercepts[i] = from->intercepts[i];
345
346         to->iopm_base_pa        = from->iopm_base_pa;
347         to->msrpm_base_pa       = from->msrpm_base_pa;
348         to->tsc_offset          = from->tsc_offset;
349         to->tlb_ctl             = from->tlb_ctl;
350         to->int_ctl             = from->int_ctl;
351         to->int_vector          = from->int_vector;
352         to->int_state           = from->int_state;
353         to->exit_code           = from->exit_code;
354         to->exit_code_hi        = from->exit_code_hi;
355         to->exit_info_1         = from->exit_info_1;
356         to->exit_info_2         = from->exit_info_2;
357         to->exit_int_info       = from->exit_int_info;
358         to->exit_int_info_err   = from->exit_int_info_err;
359         to->nested_ctl          = from->nested_ctl;
360         to->event_inj           = from->event_inj;
361         to->event_inj_err       = from->event_inj_err;
362         to->next_rip            = from->next_rip;
363         to->nested_cr3          = from->nested_cr3;
364         to->virt_ext            = from->virt_ext;
365         to->pause_filter_count  = from->pause_filter_count;
366         to->pause_filter_thresh = from->pause_filter_thresh;
367
368         /* Copy asid here because nested_vmcb_check_controls will check it.  */
369         to->asid           = from->asid;
370         to->msrpm_base_pa &= ~0x0fffULL;
371         to->iopm_base_pa  &= ~0x0fffULL;
372
373         /* Hyper-V extensions (Enlightened VMCB) */
374         if (kvm_hv_hypercall_enabled(vcpu)) {
375                 to->clean = from->clean;
376                 memcpy(&to->hv_enlightenments, &from->hv_enlightenments,
377                        sizeof(to->hv_enlightenments));
378         }
379 }
380
381 void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
382                                        struct vmcb_control_area *control)
383 {
384         __nested_copy_vmcb_control_to_cache(&svm->vcpu, &svm->nested.ctl, control);
385 }
386
387 static void __nested_copy_vmcb_save_to_cache(struct vmcb_save_area_cached *to,
388                                              struct vmcb_save_area *from)
389 {
390         /*
391          * Copy only fields that are validated, as we need them
392          * to avoid TOC/TOU races.
393          */
394         to->efer = from->efer;
395         to->cr0 = from->cr0;
396         to->cr3 = from->cr3;
397         to->cr4 = from->cr4;
398
399         to->dr6 = from->dr6;
400         to->dr7 = from->dr7;
401 }
402
403 void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
404                                     struct vmcb_save_area *save)
405 {
406         __nested_copy_vmcb_save_to_cache(&svm->nested.save, save);
407 }
408
409 /*
410  * Synchronize fields that are written by the processor, so that
411  * they can be copied back into the vmcb12.
412  */
413 void nested_sync_control_from_vmcb02(struct vcpu_svm *svm)
414 {
415         u32 mask;
416         svm->nested.ctl.event_inj      = svm->vmcb->control.event_inj;
417         svm->nested.ctl.event_inj_err  = svm->vmcb->control.event_inj_err;
418
419         /* Only a few fields of int_ctl are written by the processor.  */
420         mask = V_IRQ_MASK | V_TPR_MASK;
421         if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) &&
422             svm_is_intercept(svm, INTERCEPT_VINTR)) {
423                 /*
424                  * In order to request an interrupt window, L0 is usurping
425                  * svm->vmcb->control.int_ctl and possibly setting V_IRQ
426                  * even if it was clear in L1's VMCB.  Restoring it would be
427                  * wrong.  However, in this case V_IRQ will remain true until
428                  * interrupt_window_interception calls svm_clear_vintr and
429                  * restores int_ctl.  We can just leave it aside.
430                  */
431                 mask &= ~V_IRQ_MASK;
432         }
433
434         if (nested_vgif_enabled(svm))
435                 mask |= V_GIF_MASK;
436
437         svm->nested.ctl.int_ctl        &= ~mask;
438         svm->nested.ctl.int_ctl        |= svm->vmcb->control.int_ctl & mask;
439 }
440
441 /*
442  * Transfer any event that L0 or L1 wanted to inject into L2 to
443  * EXIT_INT_INFO.
444  */
445 static void nested_save_pending_event_to_vmcb12(struct vcpu_svm *svm,
446                                                 struct vmcb *vmcb12)
447 {
448         struct kvm_vcpu *vcpu = &svm->vcpu;
449         u32 exit_int_info = 0;
450         unsigned int nr;
451
452         if (vcpu->arch.exception.injected) {
453                 nr = vcpu->arch.exception.vector;
454                 exit_int_info = nr | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
455
456                 if (vcpu->arch.exception.has_error_code) {
457                         exit_int_info |= SVM_EVTINJ_VALID_ERR;
458                         vmcb12->control.exit_int_info_err =
459                                 vcpu->arch.exception.error_code;
460                 }
461
462         } else if (vcpu->arch.nmi_injected) {
463                 exit_int_info = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
464
465         } else if (vcpu->arch.interrupt.injected) {
466                 nr = vcpu->arch.interrupt.nr;
467                 exit_int_info = nr | SVM_EVTINJ_VALID;
468
469                 if (vcpu->arch.interrupt.soft)
470                         exit_int_info |= SVM_EVTINJ_TYPE_SOFT;
471                 else
472                         exit_int_info |= SVM_EVTINJ_TYPE_INTR;
473         }
474
475         vmcb12->control.exit_int_info = exit_int_info;
476 }
477
478 static void nested_svm_transition_tlb_flush(struct kvm_vcpu *vcpu)
479 {
480         /*
481          * KVM_REQ_HV_TLB_FLUSH flushes entries from either L1's VP_ID or
482          * L2's VP_ID upon request from the guest. Make sure we check for
483          * pending entries in the right FIFO upon L1/L2 transition as these
484          * requests are put by other vCPUs asynchronously.
485          */
486         if (to_hv_vcpu(vcpu) && npt_enabled)
487                 kvm_make_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
488
489         /*
490          * TODO: optimize unconditional TLB flush/MMU sync.  A partial list of
491          * things to fix before this can be conditional:
492          *
493          *  - Flush TLBs for both L1 and L2 remote TLB flush
494          *  - Honor L1's request to flush an ASID on nested VMRUN
495          *  - Sync nested NPT MMU on VMRUN that flushes L2's ASID[*]
496          *  - Don't crush a pending TLB flush in vmcb02 on nested VMRUN
497          *  - Flush L1's ASID on KVM_REQ_TLB_FLUSH_GUEST
498          *
499          * [*] Unlike nested EPT, SVM's ASID management can invalidate nested
500          *     NPT guest-physical mappings on VMRUN.
501          */
502         kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
503         kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
504 }
505
506 /*
507  * Load guest's/host's cr3 on nested vmentry or vmexit. @nested_npt is true
508  * if we are emulating VM-Entry into a guest with NPT enabled.
509  */
510 static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
511                                bool nested_npt, bool reload_pdptrs)
512 {
513         if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3)))
514                 return -EINVAL;
515
516         if (reload_pdptrs && !nested_npt && is_pae_paging(vcpu) &&
517             CC(!load_pdptrs(vcpu, cr3)))
518                 return -EINVAL;
519
520         vcpu->arch.cr3 = cr3;
521
522         /* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
523         kvm_init_mmu(vcpu);
524
525         if (!nested_npt)
526                 kvm_mmu_new_pgd(vcpu, cr3);
527
528         return 0;
529 }
530
531 void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm)
532 {
533         if (!svm->nested.vmcb02.ptr)
534                 return;
535
536         /* FIXME: merge g_pat from vmcb01 and vmcb12.  */
537         svm->nested.vmcb02.ptr->save.g_pat = svm->vmcb01.ptr->save.g_pat;
538 }
539
540 static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
541 {
542         bool new_vmcb12 = false;
543         struct vmcb *vmcb01 = svm->vmcb01.ptr;
544         struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
545
546         nested_vmcb02_compute_g_pat(svm);
547
548         /* Load the nested guest state */
549         if (svm->nested.vmcb12_gpa != svm->nested.last_vmcb12_gpa) {
550                 new_vmcb12 = true;
551                 svm->nested.last_vmcb12_gpa = svm->nested.vmcb12_gpa;
552                 svm->nested.force_msr_bitmap_recalc = true;
553         }
554
555         if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_SEG))) {
556                 vmcb02->save.es = vmcb12->save.es;
557                 vmcb02->save.cs = vmcb12->save.cs;
558                 vmcb02->save.ss = vmcb12->save.ss;
559                 vmcb02->save.ds = vmcb12->save.ds;
560                 vmcb02->save.cpl = vmcb12->save.cpl;
561                 vmcb_mark_dirty(vmcb02, VMCB_SEG);
562         }
563
564         if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DT))) {
565                 vmcb02->save.gdtr = vmcb12->save.gdtr;
566                 vmcb02->save.idtr = vmcb12->save.idtr;
567                 vmcb_mark_dirty(vmcb02, VMCB_DT);
568         }
569
570         kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
571
572         svm_set_efer(&svm->vcpu, svm->nested.save.efer);
573
574         svm_set_cr0(&svm->vcpu, svm->nested.save.cr0);
575         svm_set_cr4(&svm->vcpu, svm->nested.save.cr4);
576
577         svm->vcpu.arch.cr2 = vmcb12->save.cr2;
578
579         kvm_rax_write(&svm->vcpu, vmcb12->save.rax);
580         kvm_rsp_write(&svm->vcpu, vmcb12->save.rsp);
581         kvm_rip_write(&svm->vcpu, vmcb12->save.rip);
582
583         /* In case we don't even reach vcpu_run, the fields are not updated */
584         vmcb02->save.rax = vmcb12->save.rax;
585         vmcb02->save.rsp = vmcb12->save.rsp;
586         vmcb02->save.rip = vmcb12->save.rip;
587
588         /* These bits will be set properly on the first execution when new_vmc12 is true */
589         if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) {
590                 vmcb02->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1;
591                 svm->vcpu.arch.dr6  = svm->nested.save.dr6 | DR6_ACTIVE_LOW;
592                 vmcb_mark_dirty(vmcb02, VMCB_DR);
593         }
594
595         if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
596                 /*
597                  * Reserved bits of DEBUGCTL are ignored.  Be consistent with
598                  * svm_set_msr's definition of reserved bits.
599                  */
600                 svm_copy_lbrs(vmcb02, vmcb12);
601                 vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS;
602                 svm_update_lbrv(&svm->vcpu);
603
604         } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
605                 svm_copy_lbrs(vmcb02, vmcb01);
606         }
607 }
608
609 static inline bool is_evtinj_soft(u32 evtinj)
610 {
611         u32 type = evtinj & SVM_EVTINJ_TYPE_MASK;
612         u8 vector = evtinj & SVM_EVTINJ_VEC_MASK;
613
614         if (!(evtinj & SVM_EVTINJ_VALID))
615                 return false;
616
617         if (type == SVM_EVTINJ_TYPE_SOFT)
618                 return true;
619
620         return type == SVM_EVTINJ_TYPE_EXEPT && kvm_exception_is_soft(vector);
621 }
622
623 static bool is_evtinj_nmi(u32 evtinj)
624 {
625         u32 type = evtinj & SVM_EVTINJ_TYPE_MASK;
626
627         if (!(evtinj & SVM_EVTINJ_VALID))
628                 return false;
629
630         return type == SVM_EVTINJ_TYPE_NMI;
631 }
632
633 static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
634                                           unsigned long vmcb12_rip,
635                                           unsigned long vmcb12_csbase)
636 {
637         u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK;
638         u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
639
640         struct kvm_vcpu *vcpu = &svm->vcpu;
641         struct vmcb *vmcb01 = svm->vmcb01.ptr;
642         struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
643         u32 pause_count12;
644         u32 pause_thresh12;
645
646         /*
647          * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
648          * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
649          */
650
651         if (svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
652                 int_ctl_vmcb12_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
653         else
654                 int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
655
656         /* Copied from vmcb01.  msrpm_base can be overwritten later.  */
657         vmcb02->control.nested_ctl = vmcb01->control.nested_ctl;
658         vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa;
659         vmcb02->control.msrpm_base_pa = vmcb01->control.msrpm_base_pa;
660
661         /* Done at vmrun: asid.  */
662
663         /* Also overwritten later if necessary.  */
664         vmcb02->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
665
666         /* nested_cr3.  */
667         if (nested_npt_enabled(svm))
668                 nested_svm_init_mmu_context(vcpu);
669
670         vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
671                         vcpu->arch.l1_tsc_offset,
672                         svm->nested.ctl.tsc_offset,
673                         svm->tsc_ratio_msr);
674
675         vmcb02->control.tsc_offset = vcpu->arch.tsc_offset;
676
677         if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
678                 WARN_ON(!svm->tsc_scaling_enabled);
679                 nested_svm_update_tsc_ratio_msr(vcpu);
680         }
681
682         vmcb02->control.int_ctl             =
683                 (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
684                 (vmcb01->control.int_ctl & int_ctl_vmcb01_bits);
685
686         vmcb02->control.int_vector          = svm->nested.ctl.int_vector;
687         vmcb02->control.int_state           = svm->nested.ctl.int_state;
688         vmcb02->control.event_inj           = svm->nested.ctl.event_inj;
689         vmcb02->control.event_inj_err       = svm->nested.ctl.event_inj_err;
690
691         /*
692          * next_rip is consumed on VMRUN as the return address pushed on the
693          * stack for injected soft exceptions/interrupts.  If nrips is exposed
694          * to L1, take it verbatim from vmcb12.  If nrips is supported in
695          * hardware but not exposed to L1, stuff the actual L2 RIP to emulate
696          * what a nrips=0 CPU would do (L1 is responsible for advancing RIP
697          * prior to injecting the event).
698          */
699         if (svm->nrips_enabled)
700                 vmcb02->control.next_rip    = svm->nested.ctl.next_rip;
701         else if (boot_cpu_has(X86_FEATURE_NRIPS))
702                 vmcb02->control.next_rip    = vmcb12_rip;
703
704         svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj);
705         if (is_evtinj_soft(vmcb02->control.event_inj)) {
706                 svm->soft_int_injected = true;
707                 svm->soft_int_csbase = vmcb12_csbase;
708                 svm->soft_int_old_rip = vmcb12_rip;
709                 if (svm->nrips_enabled)
710                         svm->soft_int_next_rip = svm->nested.ctl.next_rip;
711                 else
712                         svm->soft_int_next_rip = vmcb12_rip;
713         }
714
715         vmcb02->control.virt_ext            = vmcb01->control.virt_ext &
716                                               LBR_CTL_ENABLE_MASK;
717         if (svm->lbrv_enabled)
718                 vmcb02->control.virt_ext  |=
719                         (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK);
720
721         if (!nested_vmcb_needs_vls_intercept(svm))
722                 vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
723
724         pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0;
725         pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0;
726         if (kvm_pause_in_guest(svm->vcpu.kvm)) {
727                 /* use guest values since host doesn't intercept PAUSE */
728                 vmcb02->control.pause_filter_count = pause_count12;
729                 vmcb02->control.pause_filter_thresh = pause_thresh12;
730
731         } else {
732                 /* start from host values otherwise */
733                 vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count;
734                 vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh;
735
736                 /* ... but ensure filtering is disabled if so requested.  */
737                 if (vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) {
738                         if (!pause_count12)
739                                 vmcb02->control.pause_filter_count = 0;
740                         if (!pause_thresh12)
741                                 vmcb02->control.pause_filter_thresh = 0;
742                 }
743         }
744
745         nested_svm_transition_tlb_flush(vcpu);
746
747         /* Enter Guest-Mode */
748         enter_guest_mode(vcpu);
749
750         /*
751          * Merge guest and host intercepts - must be called with vcpu in
752          * guest-mode to take effect.
753          */
754         recalc_intercepts(svm);
755 }
756
757 static void nested_svm_copy_common_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
758 {
759         /*
760          * Some VMCB state is shared between L1 and L2 and thus has to be
761          * moved at the time of nested vmrun and vmexit.
762          *
763          * VMLOAD/VMSAVE state would also belong in this category, but KVM
764          * always performs VMLOAD and VMSAVE from the VMCB01.
765          */
766         to_vmcb->save.spec_ctrl = from_vmcb->save.spec_ctrl;
767 }
768
769 int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
770                          struct vmcb *vmcb12, bool from_vmrun)
771 {
772         struct vcpu_svm *svm = to_svm(vcpu);
773         int ret;
774
775         trace_kvm_nested_vmenter(svm->vmcb->save.rip,
776                                  vmcb12_gpa,
777                                  vmcb12->save.rip,
778                                  vmcb12->control.int_ctl,
779                                  vmcb12->control.event_inj,
780                                  vmcb12->control.nested_ctl,
781                                  vmcb12->control.nested_cr3,
782                                  vmcb12->save.cr3,
783                                  KVM_ISA_SVM);
784
785         trace_kvm_nested_intercepts(vmcb12->control.intercepts[INTERCEPT_CR] & 0xffff,
786                                     vmcb12->control.intercepts[INTERCEPT_CR] >> 16,
787                                     vmcb12->control.intercepts[INTERCEPT_EXCEPTION],
788                                     vmcb12->control.intercepts[INTERCEPT_WORD3],
789                                     vmcb12->control.intercepts[INTERCEPT_WORD4],
790                                     vmcb12->control.intercepts[INTERCEPT_WORD5]);
791
792
793         svm->nested.vmcb12_gpa = vmcb12_gpa;
794
795         WARN_ON(svm->vmcb == svm->nested.vmcb02.ptr);
796
797         nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
798
799         svm_switch_vmcb(svm, &svm->nested.vmcb02);
800         nested_vmcb02_prepare_control(svm, vmcb12->save.rip, vmcb12->save.cs.base);
801         nested_vmcb02_prepare_save(svm, vmcb12);
802
803         ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
804                                   nested_npt_enabled(svm), from_vmrun);
805         if (ret)
806                 return ret;
807
808         if (!from_vmrun)
809                 kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
810
811         svm_set_gif(svm, true);
812
813         if (kvm_vcpu_apicv_active(vcpu))
814                 kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
815
816         nested_svm_hv_update_vm_vp_ids(vcpu);
817
818         return 0;
819 }
820
821 int nested_svm_vmrun(struct kvm_vcpu *vcpu)
822 {
823         struct vcpu_svm *svm = to_svm(vcpu);
824         int ret;
825         struct vmcb *vmcb12;
826         struct kvm_host_map map;
827         u64 vmcb12_gpa;
828         struct vmcb *vmcb01 = svm->vmcb01.ptr;
829
830         if (!svm->nested.hsave_msr) {
831                 kvm_inject_gp(vcpu, 0);
832                 return 1;
833         }
834
835         if (is_smm(vcpu)) {
836                 kvm_queue_exception(vcpu, UD_VECTOR);
837                 return 1;
838         }
839
840         /* This fails when VP assist page is enabled but the supplied GPA is bogus */
841         ret = kvm_hv_verify_vp_assist(vcpu);
842         if (ret) {
843                 kvm_inject_gp(vcpu, 0);
844                 return ret;
845         }
846
847         vmcb12_gpa = svm->vmcb->save.rax;
848         ret = kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map);
849         if (ret == -EINVAL) {
850                 kvm_inject_gp(vcpu, 0);
851                 return 1;
852         } else if (ret) {
853                 return kvm_skip_emulated_instruction(vcpu);
854         }
855
856         ret = kvm_skip_emulated_instruction(vcpu);
857
858         vmcb12 = map.hva;
859
860         if (WARN_ON_ONCE(!svm->nested.initialized))
861                 return -EINVAL;
862
863         nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
864         nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
865
866         if (!nested_vmcb_check_save(vcpu) ||
867             !nested_vmcb_check_controls(vcpu)) {
868                 vmcb12->control.exit_code    = SVM_EXIT_ERR;
869                 vmcb12->control.exit_code_hi = 0;
870                 vmcb12->control.exit_info_1  = 0;
871                 vmcb12->control.exit_info_2  = 0;
872                 goto out;
873         }
874
875         /*
876          * Since vmcb01 is not in use, we can use it to store some of the L1
877          * state.
878          */
879         vmcb01->save.efer   = vcpu->arch.efer;
880         vmcb01->save.cr0    = kvm_read_cr0(vcpu);
881         vmcb01->save.cr4    = vcpu->arch.cr4;
882         vmcb01->save.rflags = kvm_get_rflags(vcpu);
883         vmcb01->save.rip    = kvm_rip_read(vcpu);
884
885         if (!npt_enabled)
886                 vmcb01->save.cr3 = kvm_read_cr3(vcpu);
887
888         svm->nested.nested_run_pending = 1;
889
890         if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, true))
891                 goto out_exit_err;
892
893         if (nested_svm_vmrun_msrpm(svm))
894                 goto out;
895
896 out_exit_err:
897         svm->nested.nested_run_pending = 0;
898         svm->nmi_l1_to_l2 = false;
899         svm->soft_int_injected = false;
900
901         svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
902         svm->vmcb->control.exit_code_hi = 0;
903         svm->vmcb->control.exit_info_1  = 0;
904         svm->vmcb->control.exit_info_2  = 0;
905
906         nested_svm_vmexit(svm);
907
908 out:
909         kvm_vcpu_unmap(vcpu, &map, true);
910
911         return ret;
912 }
913
914 /* Copy state save area fields which are handled by VMRUN */
915 void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
916                           struct vmcb_save_area *from_save)
917 {
918         to_save->es = from_save->es;
919         to_save->cs = from_save->cs;
920         to_save->ss = from_save->ss;
921         to_save->ds = from_save->ds;
922         to_save->gdtr = from_save->gdtr;
923         to_save->idtr = from_save->idtr;
924         to_save->rflags = from_save->rflags | X86_EFLAGS_FIXED;
925         to_save->efer = from_save->efer;
926         to_save->cr0 = from_save->cr0;
927         to_save->cr3 = from_save->cr3;
928         to_save->cr4 = from_save->cr4;
929         to_save->rax = from_save->rax;
930         to_save->rsp = from_save->rsp;
931         to_save->rip = from_save->rip;
932         to_save->cpl = 0;
933 }
934
935 void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
936 {
937         to_vmcb->save.fs = from_vmcb->save.fs;
938         to_vmcb->save.gs = from_vmcb->save.gs;
939         to_vmcb->save.tr = from_vmcb->save.tr;
940         to_vmcb->save.ldtr = from_vmcb->save.ldtr;
941         to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
942         to_vmcb->save.star = from_vmcb->save.star;
943         to_vmcb->save.lstar = from_vmcb->save.lstar;
944         to_vmcb->save.cstar = from_vmcb->save.cstar;
945         to_vmcb->save.sfmask = from_vmcb->save.sfmask;
946         to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
947         to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
948         to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
949 }
950
951 int nested_svm_vmexit(struct vcpu_svm *svm)
952 {
953         struct kvm_vcpu *vcpu = &svm->vcpu;
954         struct vmcb *vmcb01 = svm->vmcb01.ptr;
955         struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
956         struct vmcb *vmcb12;
957         struct kvm_host_map map;
958         int rc;
959
960         rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map);
961         if (rc) {
962                 if (rc == -EINVAL)
963                         kvm_inject_gp(vcpu, 0);
964                 return 1;
965         }
966
967         vmcb12 = map.hva;
968
969         /* Exit Guest-Mode */
970         leave_guest_mode(vcpu);
971         svm->nested.vmcb12_gpa = 0;
972         WARN_ON_ONCE(svm->nested.nested_run_pending);
973
974         kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
975
976         /* in case we halted in L2 */
977         svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
978
979         /* Give the current vmcb to the guest */
980
981         vmcb12->save.es     = vmcb02->save.es;
982         vmcb12->save.cs     = vmcb02->save.cs;
983         vmcb12->save.ss     = vmcb02->save.ss;
984         vmcb12->save.ds     = vmcb02->save.ds;
985         vmcb12->save.gdtr   = vmcb02->save.gdtr;
986         vmcb12->save.idtr   = vmcb02->save.idtr;
987         vmcb12->save.efer   = svm->vcpu.arch.efer;
988         vmcb12->save.cr0    = kvm_read_cr0(vcpu);
989         vmcb12->save.cr3    = kvm_read_cr3(vcpu);
990         vmcb12->save.cr2    = vmcb02->save.cr2;
991         vmcb12->save.cr4    = svm->vcpu.arch.cr4;
992         vmcb12->save.rflags = kvm_get_rflags(vcpu);
993         vmcb12->save.rip    = kvm_rip_read(vcpu);
994         vmcb12->save.rsp    = kvm_rsp_read(vcpu);
995         vmcb12->save.rax    = kvm_rax_read(vcpu);
996         vmcb12->save.dr7    = vmcb02->save.dr7;
997         vmcb12->save.dr6    = svm->vcpu.arch.dr6;
998         vmcb12->save.cpl    = vmcb02->save.cpl;
999
1000         vmcb12->control.int_state         = vmcb02->control.int_state;
1001         vmcb12->control.exit_code         = vmcb02->control.exit_code;
1002         vmcb12->control.exit_code_hi      = vmcb02->control.exit_code_hi;
1003         vmcb12->control.exit_info_1       = vmcb02->control.exit_info_1;
1004         vmcb12->control.exit_info_2       = vmcb02->control.exit_info_2;
1005
1006         if (vmcb12->control.exit_code != SVM_EXIT_ERR)
1007                 nested_save_pending_event_to_vmcb12(svm, vmcb12);
1008
1009         if (svm->nrips_enabled)
1010                 vmcb12->control.next_rip  = vmcb02->control.next_rip;
1011
1012         vmcb12->control.int_ctl           = svm->nested.ctl.int_ctl;
1013         vmcb12->control.tlb_ctl           = svm->nested.ctl.tlb_ctl;
1014         vmcb12->control.event_inj         = svm->nested.ctl.event_inj;
1015         vmcb12->control.event_inj_err     = svm->nested.ctl.event_inj_err;
1016
1017         if (!kvm_pause_in_guest(vcpu->kvm)) {
1018                 vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
1019                 vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
1020
1021         }
1022
1023         nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
1024
1025         svm_switch_vmcb(svm, &svm->vmcb01);
1026
1027         if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
1028                 svm_copy_lbrs(vmcb12, vmcb02);
1029                 svm_update_lbrv(vcpu);
1030         } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
1031                 svm_copy_lbrs(vmcb01, vmcb02);
1032                 svm_update_lbrv(vcpu);
1033         }
1034
1035         /*
1036          * On vmexit the  GIF is set to false and
1037          * no event can be injected in L1.
1038          */
1039         svm_set_gif(svm, false);
1040         vmcb01->control.exit_int_info = 0;
1041
1042         svm->vcpu.arch.tsc_offset = svm->vcpu.arch.l1_tsc_offset;
1043         if (vmcb01->control.tsc_offset != svm->vcpu.arch.tsc_offset) {
1044                 vmcb01->control.tsc_offset = svm->vcpu.arch.tsc_offset;
1045                 vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
1046         }
1047
1048         if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
1049                 WARN_ON(!svm->tsc_scaling_enabled);
1050                 vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
1051                 __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
1052         }
1053
1054         svm->nested.ctl.nested_cr3 = 0;
1055
1056         /*
1057          * Restore processor state that had been saved in vmcb01
1058          */
1059         kvm_set_rflags(vcpu, vmcb01->save.rflags);
1060         svm_set_efer(vcpu, vmcb01->save.efer);
1061         svm_set_cr0(vcpu, vmcb01->save.cr0 | X86_CR0_PE);
1062         svm_set_cr4(vcpu, vmcb01->save.cr4);
1063         kvm_rax_write(vcpu, vmcb01->save.rax);
1064         kvm_rsp_write(vcpu, vmcb01->save.rsp);
1065         kvm_rip_write(vcpu, vmcb01->save.rip);
1066
1067         svm->vcpu.arch.dr7 = DR7_FIXED_1;
1068         kvm_update_dr7(&svm->vcpu);
1069
1070         trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code,
1071                                        vmcb12->control.exit_info_1,
1072                                        vmcb12->control.exit_info_2,
1073                                        vmcb12->control.exit_int_info,
1074                                        vmcb12->control.exit_int_info_err,
1075                                        KVM_ISA_SVM);
1076
1077         kvm_vcpu_unmap(vcpu, &map, true);
1078
1079         nested_svm_transition_tlb_flush(vcpu);
1080
1081         nested_svm_uninit_mmu_context(vcpu);
1082
1083         rc = nested_svm_load_cr3(vcpu, vmcb01->save.cr3, false, true);
1084         if (rc)
1085                 return 1;
1086
1087         /*
1088          * Drop what we picked up for L2 via svm_complete_interrupts() so it
1089          * doesn't end up in L1.
1090          */
1091         svm->vcpu.arch.nmi_injected = false;
1092         kvm_clear_exception_queue(vcpu);
1093         kvm_clear_interrupt_queue(vcpu);
1094
1095         /*
1096          * If we are here following the completion of a VMRUN that
1097          * is being single-stepped, queue the pending #DB intercept
1098          * right now so that it an be accounted for before we execute
1099          * L1's next instruction.
1100          */
1101         if (unlikely(vmcb01->save.rflags & X86_EFLAGS_TF))
1102                 kvm_queue_exception(&(svm->vcpu), DB_VECTOR);
1103
1104         /*
1105          * Un-inhibit the AVIC right away, so that other vCPUs can start
1106          * to benefit from it right away.
1107          */
1108         if (kvm_apicv_activated(vcpu->kvm))
1109                 kvm_vcpu_update_apicv(vcpu);
1110
1111         return 0;
1112 }
1113
1114 static void nested_svm_triple_fault(struct kvm_vcpu *vcpu)
1115 {
1116         struct vcpu_svm *svm = to_svm(vcpu);
1117
1118         if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SHUTDOWN))
1119                 return;
1120
1121         kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu);
1122         nested_svm_simple_vmexit(to_svm(vcpu), SVM_EXIT_SHUTDOWN);
1123 }
1124
1125 int svm_allocate_nested(struct vcpu_svm *svm)
1126 {
1127         struct page *vmcb02_page;
1128
1129         if (svm->nested.initialized)
1130                 return 0;
1131
1132         vmcb02_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
1133         if (!vmcb02_page)
1134                 return -ENOMEM;
1135         svm->nested.vmcb02.ptr = page_address(vmcb02_page);
1136         svm->nested.vmcb02.pa = __sme_set(page_to_pfn(vmcb02_page) << PAGE_SHIFT);
1137
1138         svm->nested.msrpm = svm_vcpu_alloc_msrpm();
1139         if (!svm->nested.msrpm)
1140                 goto err_free_vmcb02;
1141         svm_vcpu_init_msrpm(&svm->vcpu, svm->nested.msrpm);
1142
1143         svm->nested.initialized = true;
1144         return 0;
1145
1146 err_free_vmcb02:
1147         __free_page(vmcb02_page);
1148         return -ENOMEM;
1149 }
1150
1151 void svm_free_nested(struct vcpu_svm *svm)
1152 {
1153         if (!svm->nested.initialized)
1154                 return;
1155
1156         if (WARN_ON_ONCE(svm->vmcb != svm->vmcb01.ptr))
1157                 svm_switch_vmcb(svm, &svm->vmcb01);
1158
1159         svm_vcpu_free_msrpm(svm->nested.msrpm);
1160         svm->nested.msrpm = NULL;
1161
1162         __free_page(virt_to_page(svm->nested.vmcb02.ptr));
1163         svm->nested.vmcb02.ptr = NULL;
1164
1165         /*
1166          * When last_vmcb12_gpa matches the current vmcb12 gpa,
1167          * some vmcb12 fields are not loaded if they are marked clean
1168          * in the vmcb12, since in this case they are up to date already.
1169          *
1170          * When the vmcb02 is freed, this optimization becomes invalid.
1171          */
1172         svm->nested.last_vmcb12_gpa = INVALID_GPA;
1173
1174         svm->nested.initialized = false;
1175 }
1176
1177 void svm_leave_nested(struct kvm_vcpu *vcpu)
1178 {
1179         struct vcpu_svm *svm = to_svm(vcpu);
1180
1181         if (is_guest_mode(vcpu)) {
1182                 svm->nested.nested_run_pending = 0;
1183                 svm->nested.vmcb12_gpa = INVALID_GPA;
1184
1185                 leave_guest_mode(vcpu);
1186
1187                 svm_switch_vmcb(svm, &svm->vmcb01);
1188
1189                 nested_svm_uninit_mmu_context(vcpu);
1190                 vmcb_mark_all_dirty(svm->vmcb);
1191         }
1192
1193         kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
1194 }
1195
1196 static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
1197 {
1198         u32 offset, msr, value;
1199         int write, mask;
1200
1201         if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
1202                 return NESTED_EXIT_HOST;
1203
1204         msr    = svm->vcpu.arch.regs[VCPU_REGS_RCX];
1205         offset = svm_msrpm_offset(msr);
1206         write  = svm->vmcb->control.exit_info_1 & 1;
1207         mask   = 1 << ((2 * (msr & 0xf)) + write);
1208
1209         if (offset == MSR_INVALID)
1210                 return NESTED_EXIT_DONE;
1211
1212         /* Offset is in 32 bit units but need in 8 bit units */
1213         offset *= 4;
1214
1215         if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.ctl.msrpm_base_pa + offset, &value, 4))
1216                 return NESTED_EXIT_DONE;
1217
1218         return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
1219 }
1220
1221 static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
1222 {
1223         unsigned port, size, iopm_len;
1224         u16 val, mask;
1225         u8 start_bit;
1226         u64 gpa;
1227
1228         if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_IOIO_PROT)))
1229                 return NESTED_EXIT_HOST;
1230
1231         port = svm->vmcb->control.exit_info_1 >> 16;
1232         size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
1233                 SVM_IOIO_SIZE_SHIFT;
1234         gpa  = svm->nested.ctl.iopm_base_pa + (port / 8);
1235         start_bit = port % 8;
1236         iopm_len = (start_bit + size > 8) ? 2 : 1;
1237         mask = (0xf >> (4 - size)) << start_bit;
1238         val = 0;
1239
1240         if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len))
1241                 return NESTED_EXIT_DONE;
1242
1243         return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
1244 }
1245
1246 static int nested_svm_intercept(struct vcpu_svm *svm)
1247 {
1248         u32 exit_code = svm->vmcb->control.exit_code;
1249         int vmexit = NESTED_EXIT_HOST;
1250
1251         switch (exit_code) {
1252         case SVM_EXIT_MSR:
1253                 vmexit = nested_svm_exit_handled_msr(svm);
1254                 break;
1255         case SVM_EXIT_IOIO:
1256                 vmexit = nested_svm_intercept_ioio(svm);
1257                 break;
1258         case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
1259                 if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
1260                         vmexit = NESTED_EXIT_DONE;
1261                 break;
1262         }
1263         case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
1264                 if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
1265                         vmexit = NESTED_EXIT_DONE;
1266                 break;
1267         }
1268         case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
1269                 /*
1270                  * Host-intercepted exceptions have been checked already in
1271                  * nested_svm_exit_special.  There is nothing to do here,
1272                  * the vmexit is injected by svm_check_nested_events.
1273                  */
1274                 vmexit = NESTED_EXIT_DONE;
1275                 break;
1276         }
1277         case SVM_EXIT_ERR: {
1278                 vmexit = NESTED_EXIT_DONE;
1279                 break;
1280         }
1281         default: {
1282                 if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
1283                         vmexit = NESTED_EXIT_DONE;
1284         }
1285         }
1286
1287         return vmexit;
1288 }
1289
1290 int nested_svm_exit_handled(struct vcpu_svm *svm)
1291 {
1292         int vmexit;
1293
1294         vmexit = nested_svm_intercept(svm);
1295
1296         if (vmexit == NESTED_EXIT_DONE)
1297                 nested_svm_vmexit(svm);
1298
1299         return vmexit;
1300 }
1301
1302 int nested_svm_check_permissions(struct kvm_vcpu *vcpu)
1303 {
1304         if (!(vcpu->arch.efer & EFER_SVME) || !is_paging(vcpu)) {
1305                 kvm_queue_exception(vcpu, UD_VECTOR);
1306                 return 1;
1307         }
1308
1309         if (to_svm(vcpu)->vmcb->save.cpl) {
1310                 kvm_inject_gp(vcpu, 0);
1311                 return 1;
1312         }
1313
1314         return 0;
1315 }
1316
1317 static bool nested_svm_is_exception_vmexit(struct kvm_vcpu *vcpu, u8 vector,
1318                                            u32 error_code)
1319 {
1320         struct vcpu_svm *svm = to_svm(vcpu);
1321
1322         return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(vector));
1323 }
1324
1325 static void nested_svm_inject_exception_vmexit(struct kvm_vcpu *vcpu)
1326 {
1327         struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit;
1328         struct vcpu_svm *svm = to_svm(vcpu);
1329         struct vmcb *vmcb = svm->vmcb;
1330
1331         vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + ex->vector;
1332         vmcb->control.exit_code_hi = 0;
1333
1334         if (ex->has_error_code)
1335                 vmcb->control.exit_info_1 = ex->error_code;
1336
1337         /*
1338          * EXITINFO2 is undefined for all exception intercepts other
1339          * than #PF.
1340          */
1341         if (ex->vector == PF_VECTOR) {
1342                 if (ex->has_payload)
1343                         vmcb->control.exit_info_2 = ex->payload;
1344                 else
1345                         vmcb->control.exit_info_2 = vcpu->arch.cr2;
1346         } else if (ex->vector == DB_VECTOR) {
1347                 /* See kvm_check_and_inject_events().  */
1348                 kvm_deliver_exception_payload(vcpu, ex);
1349
1350                 if (vcpu->arch.dr7 & DR7_GD) {
1351                         vcpu->arch.dr7 &= ~DR7_GD;
1352                         kvm_update_dr7(vcpu);
1353                 }
1354         } else {
1355                 WARN_ON(ex->has_payload);
1356         }
1357
1358         nested_svm_vmexit(svm);
1359 }
1360
1361 static inline bool nested_exit_on_init(struct vcpu_svm *svm)
1362 {
1363         return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_INIT);
1364 }
1365
1366 static int svm_check_nested_events(struct kvm_vcpu *vcpu)
1367 {
1368         struct kvm_lapic *apic = vcpu->arch.apic;
1369         struct vcpu_svm *svm = to_svm(vcpu);
1370         /*
1371          * Only a pending nested run blocks a pending exception.  If there is a
1372          * previously injected event, the pending exception occurred while said
1373          * event was being delivered and thus needs to be handled.
1374          */
1375         bool block_nested_exceptions = svm->nested.nested_run_pending;
1376         /*
1377          * New events (not exceptions) are only recognized at instruction
1378          * boundaries.  If an event needs reinjection, then KVM is handling a
1379          * VM-Exit that occurred _during_ instruction execution; new events are
1380          * blocked until the instruction completes.
1381          */
1382         bool block_nested_events = block_nested_exceptions ||
1383                                    kvm_event_needs_reinjection(vcpu);
1384
1385         if (lapic_in_kernel(vcpu) &&
1386             test_bit(KVM_APIC_INIT, &apic->pending_events)) {
1387                 if (block_nested_events)
1388                         return -EBUSY;
1389                 if (!nested_exit_on_init(svm))
1390                         return 0;
1391                 nested_svm_simple_vmexit(svm, SVM_EXIT_INIT);
1392                 return 0;
1393         }
1394
1395         if (vcpu->arch.exception_vmexit.pending) {
1396                 if (block_nested_exceptions)
1397                         return -EBUSY;
1398                 nested_svm_inject_exception_vmexit(vcpu);
1399                 return 0;
1400         }
1401
1402         if (vcpu->arch.exception.pending) {
1403                 if (block_nested_exceptions)
1404                         return -EBUSY;
1405                 return 0;
1406         }
1407
1408 #ifdef CONFIG_KVM_SMM
1409         if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) {
1410                 if (block_nested_events)
1411                         return -EBUSY;
1412                 if (!nested_exit_on_smi(svm))
1413                         return 0;
1414                 nested_svm_simple_vmexit(svm, SVM_EXIT_SMI);
1415                 return 0;
1416         }
1417 #endif
1418
1419         if (vcpu->arch.nmi_pending && !svm_nmi_blocked(vcpu)) {
1420                 if (block_nested_events)
1421                         return -EBUSY;
1422                 if (!nested_exit_on_nmi(svm))
1423                         return 0;
1424                 nested_svm_simple_vmexit(svm, SVM_EXIT_NMI);
1425                 return 0;
1426         }
1427
1428         if (kvm_cpu_has_interrupt(vcpu) && !svm_interrupt_blocked(vcpu)) {
1429                 if (block_nested_events)
1430                         return -EBUSY;
1431                 if (!nested_exit_on_intr(svm))
1432                         return 0;
1433                 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
1434                 nested_svm_simple_vmexit(svm, SVM_EXIT_INTR);
1435                 return 0;
1436         }
1437
1438         return 0;
1439 }
1440
1441 int nested_svm_exit_special(struct vcpu_svm *svm)
1442 {
1443         u32 exit_code = svm->vmcb->control.exit_code;
1444         struct kvm_vcpu *vcpu = &svm->vcpu;
1445
1446         switch (exit_code) {
1447         case SVM_EXIT_INTR:
1448         case SVM_EXIT_NMI:
1449         case SVM_EXIT_NPF:
1450                 return NESTED_EXIT_HOST;
1451         case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
1452                 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
1453
1454                 if (svm->vmcb01.ptr->control.intercepts[INTERCEPT_EXCEPTION] &
1455                     excp_bits)
1456                         return NESTED_EXIT_HOST;
1457                 else if (exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR &&
1458                          svm->vcpu.arch.apf.host_apf_flags)
1459                         /* Trap async PF even if not shadowing */
1460                         return NESTED_EXIT_HOST;
1461                 break;
1462         }
1463         case SVM_EXIT_VMMCALL:
1464                 /* Hyper-V L2 TLB flush hypercall is handled by L0 */
1465                 if (guest_hv_cpuid_has_l2_tlb_flush(vcpu) &&
1466                     nested_svm_l2_tlb_flush_enabled(vcpu) &&
1467                     kvm_hv_is_tlb_flush_hcall(vcpu))
1468                         return NESTED_EXIT_HOST;
1469                 break;
1470         default:
1471                 break;
1472         }
1473
1474         return NESTED_EXIT_CONTINUE;
1475 }
1476
1477 void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu)
1478 {
1479         struct vcpu_svm *svm = to_svm(vcpu);
1480
1481         vcpu->arch.tsc_scaling_ratio =
1482                 kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio,
1483                                                svm->tsc_ratio_msr);
1484         __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
1485 }
1486
1487 /* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */
1488 static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst,
1489                                               struct vmcb_ctrl_area_cached *from)
1490 {
1491         unsigned int i;
1492
1493         memset(dst, 0, sizeof(struct vmcb_control_area));
1494
1495         for (i = 0; i < MAX_INTERCEPT; i++)
1496                 dst->intercepts[i] = from->intercepts[i];
1497
1498         dst->iopm_base_pa         = from->iopm_base_pa;
1499         dst->msrpm_base_pa        = from->msrpm_base_pa;
1500         dst->tsc_offset           = from->tsc_offset;
1501         dst->asid                 = from->asid;
1502         dst->tlb_ctl              = from->tlb_ctl;
1503         dst->int_ctl              = from->int_ctl;
1504         dst->int_vector           = from->int_vector;
1505         dst->int_state            = from->int_state;
1506         dst->exit_code            = from->exit_code;
1507         dst->exit_code_hi         = from->exit_code_hi;
1508         dst->exit_info_1          = from->exit_info_1;
1509         dst->exit_info_2          = from->exit_info_2;
1510         dst->exit_int_info        = from->exit_int_info;
1511         dst->exit_int_info_err    = from->exit_int_info_err;
1512         dst->nested_ctl           = from->nested_ctl;
1513         dst->event_inj            = from->event_inj;
1514         dst->event_inj_err        = from->event_inj_err;
1515         dst->next_rip             = from->next_rip;
1516         dst->nested_cr3           = from->nested_cr3;
1517         dst->virt_ext              = from->virt_ext;
1518         dst->pause_filter_count   = from->pause_filter_count;
1519         dst->pause_filter_thresh  = from->pause_filter_thresh;
1520         /* 'clean' and 'hv_enlightenments' are not changed by KVM */
1521 }
1522
1523 static int svm_get_nested_state(struct kvm_vcpu *vcpu,
1524                                 struct kvm_nested_state __user *user_kvm_nested_state,
1525                                 u32 user_data_size)
1526 {
1527         struct vcpu_svm *svm;
1528         struct vmcb_control_area *ctl;
1529         unsigned long r;
1530         struct kvm_nested_state kvm_state = {
1531                 .flags = 0,
1532                 .format = KVM_STATE_NESTED_FORMAT_SVM,
1533                 .size = sizeof(kvm_state),
1534         };
1535         struct vmcb __user *user_vmcb = (struct vmcb __user *)
1536                 &user_kvm_nested_state->data.svm[0];
1537
1538         if (!vcpu)
1539                 return kvm_state.size + KVM_STATE_NESTED_SVM_VMCB_SIZE;
1540
1541         svm = to_svm(vcpu);
1542
1543         if (user_data_size < kvm_state.size)
1544                 goto out;
1545
1546         /* First fill in the header and copy it out.  */
1547         if (is_guest_mode(vcpu)) {
1548                 kvm_state.hdr.svm.vmcb_pa = svm->nested.vmcb12_gpa;
1549                 kvm_state.size += KVM_STATE_NESTED_SVM_VMCB_SIZE;
1550                 kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
1551
1552                 if (svm->nested.nested_run_pending)
1553                         kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
1554         }
1555
1556         if (gif_set(svm))
1557                 kvm_state.flags |= KVM_STATE_NESTED_GIF_SET;
1558
1559         if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
1560                 return -EFAULT;
1561
1562         if (!is_guest_mode(vcpu))
1563                 goto out;
1564
1565         /*
1566          * Copy over the full size of the VMCB rather than just the size
1567          * of the structs.
1568          */
1569         if (clear_user(user_vmcb, KVM_STATE_NESTED_SVM_VMCB_SIZE))
1570                 return -EFAULT;
1571
1572         ctl = kzalloc(sizeof(*ctl), GFP_KERNEL);
1573         if (!ctl)
1574                 return -ENOMEM;
1575
1576         nested_copy_vmcb_cache_to_control(ctl, &svm->nested.ctl);
1577         r = copy_to_user(&user_vmcb->control, ctl,
1578                          sizeof(user_vmcb->control));
1579         kfree(ctl);
1580         if (r)
1581                 return -EFAULT;
1582
1583         if (copy_to_user(&user_vmcb->save, &svm->vmcb01.ptr->save,
1584                          sizeof(user_vmcb->save)))
1585                 return -EFAULT;
1586 out:
1587         return kvm_state.size;
1588 }
1589
1590 static int svm_set_nested_state(struct kvm_vcpu *vcpu,
1591                                 struct kvm_nested_state __user *user_kvm_nested_state,
1592                                 struct kvm_nested_state *kvm_state)
1593 {
1594         struct vcpu_svm *svm = to_svm(vcpu);
1595         struct vmcb __user *user_vmcb = (struct vmcb __user *)
1596                 &user_kvm_nested_state->data.svm[0];
1597         struct vmcb_control_area *ctl;
1598         struct vmcb_save_area *save;
1599         struct vmcb_save_area_cached save_cached;
1600         struct vmcb_ctrl_area_cached ctl_cached;
1601         unsigned long cr0;
1602         int ret;
1603
1604         BUILD_BUG_ON(sizeof(struct vmcb_control_area) + sizeof(struct vmcb_save_area) >
1605                      KVM_STATE_NESTED_SVM_VMCB_SIZE);
1606
1607         if (kvm_state->format != KVM_STATE_NESTED_FORMAT_SVM)
1608                 return -EINVAL;
1609
1610         if (kvm_state->flags & ~(KVM_STATE_NESTED_GUEST_MODE |
1611                                  KVM_STATE_NESTED_RUN_PENDING |
1612                                  KVM_STATE_NESTED_GIF_SET))
1613                 return -EINVAL;
1614
1615         /*
1616          * If in guest mode, vcpu->arch.efer actually refers to the L2 guest's
1617          * EFER.SVME, but EFER.SVME still has to be 1 for VMRUN to succeed.
1618          */
1619         if (!(vcpu->arch.efer & EFER_SVME)) {
1620                 /* GIF=1 and no guest mode are required if SVME=0.  */
1621                 if (kvm_state->flags != KVM_STATE_NESTED_GIF_SET)
1622                         return -EINVAL;
1623         }
1624
1625         /* SMM temporarily disables SVM, so we cannot be in guest mode.  */
1626         if (is_smm(vcpu) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
1627                 return -EINVAL;
1628
1629         if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {
1630                 svm_leave_nested(vcpu);
1631                 svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
1632                 return 0;
1633         }
1634
1635         if (!page_address_valid(vcpu, kvm_state->hdr.svm.vmcb_pa))
1636                 return -EINVAL;
1637         if (kvm_state->size < sizeof(*kvm_state) + KVM_STATE_NESTED_SVM_VMCB_SIZE)
1638                 return -EINVAL;
1639
1640         ret  = -ENOMEM;
1641         ctl  = kzalloc(sizeof(*ctl),  GFP_KERNEL_ACCOUNT);
1642         save = kzalloc(sizeof(*save), GFP_KERNEL_ACCOUNT);
1643         if (!ctl || !save)
1644                 goto out_free;
1645
1646         ret = -EFAULT;
1647         if (copy_from_user(ctl, &user_vmcb->control, sizeof(*ctl)))
1648                 goto out_free;
1649         if (copy_from_user(save, &user_vmcb->save, sizeof(*save)))
1650                 goto out_free;
1651
1652         ret = -EINVAL;
1653         __nested_copy_vmcb_control_to_cache(vcpu, &ctl_cached, ctl);
1654         if (!__nested_vmcb_check_controls(vcpu, &ctl_cached))
1655                 goto out_free;
1656
1657         /*
1658          * Processor state contains L2 state.  Check that it is
1659          * valid for guest mode (see nested_vmcb_check_save).
1660          */
1661         cr0 = kvm_read_cr0(vcpu);
1662         if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW))
1663                 goto out_free;
1664
1665         /*
1666          * Validate host state saved from before VMRUN (see
1667          * nested_svm_check_permissions).
1668          */
1669         __nested_copy_vmcb_save_to_cache(&save_cached, save);
1670         if (!(save->cr0 & X86_CR0_PG) ||
1671             !(save->cr0 & X86_CR0_PE) ||
1672             (save->rflags & X86_EFLAGS_VM) ||
1673             !__nested_vmcb_check_save(vcpu, &save_cached))
1674                 goto out_free;
1675
1676
1677         /*
1678          * All checks done, we can enter guest mode. Userspace provides
1679          * vmcb12.control, which will be combined with L1 and stored into
1680          * vmcb02, and the L1 save state which we store in vmcb01.
1681          * L2 registers if needed are moved from the current VMCB to VMCB02.
1682          */
1683
1684         if (is_guest_mode(vcpu))
1685                 svm_leave_nested(vcpu);
1686         else
1687                 svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
1688
1689         svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
1690
1691         svm->nested.nested_run_pending =
1692                 !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
1693
1694         svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
1695
1696         svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
1697         nested_copy_vmcb_control_to_cache(svm, ctl);
1698
1699         svm_switch_vmcb(svm, &svm->nested.vmcb02);
1700         nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base);
1701
1702         /*
1703          * While the nested guest CR3 is already checked and set by
1704          * KVM_SET_SREGS, it was set when nested state was yet loaded,
1705          * thus MMU might not be initialized correctly.
1706          * Set it again to fix this.
1707          */
1708
1709         ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
1710                                   nested_npt_enabled(svm), false);
1711         if (WARN_ON_ONCE(ret))
1712                 goto out_free;
1713
1714         svm->nested.force_msr_bitmap_recalc = true;
1715
1716         kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
1717         ret = 0;
1718 out_free:
1719         kfree(save);
1720         kfree(ctl);
1721
1722         return ret;
1723 }
1724
1725 static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
1726 {
1727         struct vcpu_svm *svm = to_svm(vcpu);
1728
1729         if (WARN_ON(!is_guest_mode(vcpu)))
1730                 return true;
1731
1732         if (!vcpu->arch.pdptrs_from_userspace &&
1733             !nested_npt_enabled(svm) && is_pae_paging(vcpu))
1734                 /*
1735                  * Reload the guest's PDPTRs since after a migration
1736                  * the guest CR3 might be restored prior to setting the nested
1737                  * state which can lead to a load of wrong PDPTRs.
1738                  */
1739                 if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3)))
1740                         return false;
1741
1742         if (!nested_svm_vmrun_msrpm(svm)) {
1743                 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1744                 vcpu->run->internal.suberror =
1745                         KVM_INTERNAL_ERROR_EMULATION;
1746                 vcpu->run->internal.ndata = 0;
1747                 return false;
1748         }
1749
1750         if (kvm_hv_verify_vp_assist(vcpu))
1751                 return false;
1752
1753         return true;
1754 }
1755
1756 struct kvm_x86_nested_ops svm_nested_ops = {
1757         .leave_nested = svm_leave_nested,
1758         .is_exception_vmexit = nested_svm_is_exception_vmexit,
1759         .check_events = svm_check_nested_events,
1760         .triple_fault = nested_svm_triple_fault,
1761         .get_nested_state_pages = svm_get_nested_state_pages,
1762         .get_state = svm_get_nested_state,
1763         .set_state = svm_set_nested_state,
1764         .hv_inject_synthetic_vmexit_post_tlb_flush = svm_hv_inject_synthetic_vmexit_post_tlb_flush,
1765 };