KVM: SVM: Add initial support for a VMGEXIT VMEXIT

[linux-2.6-microblaze.git] / arch / x86 / kvm / svm / sev.c
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c

index 3c9a45e..0244f4f 100644 (file)
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -14,9 +14,11 @@
  #include <linux/psp-sev.h>
  #include <linux/pagemap.h>
  #include <linux/swap.h>
+#include <linux/processor.h>
  
  #include "x86.h"
  #include "svm.h"
+#include "cpuid.h"
  
  static int sev_flush_asids(void);
  static DECLARE_RWSEM(sev_deactivate_lock);
@@ -447,10 +449,8 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
         }
  
         /*
-        * The LAUNCH_UPDATE command will perform in-place encryption of the
-        * memory content (i.e it will write the same memory region with C=1).
-        * It's possible that the cache may contain the data with C=0, i.e.,
-        * unencrypted so invalidate it first.
+        * Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in
+        * place; the cache may contain the data that was written unencrypted.
          */
         sev_clflush_pages(inpages, npages);
  
@@ -806,10 +806,9 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
                 }
  
                 /*
-                * The DBG_{DE,EN}CRYPT commands will perform {dec,en}cryption of the
-                * memory content (i.e it will write the same memory region with C=1).
-                * It's possible that the cache may contain the data with C=0, i.e.,
-                * unencrypted so invalidate it first.
+                * Flush (on non-coherent CPUs) before DBG_{DE,EN}CRYPT read or modify
+                * the pages; flush the destination too so that future accesses do not
+                * see stale data.
                  */
                 sev_clflush_pages(src_p, 1);
                 sev_clflush_pages(dst_p, 1);
@@ -857,7 +856,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
         struct kvm_sev_launch_secret params;
         struct page **pages;
         void *blob, *hdr;
-       unsigned long n;
+       unsigned long n, i;
         int ret, offset;
  
         if (!sev_guest(kvm))
@@ -870,6 +869,12 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
         if (IS_ERR(pages))
                 return PTR_ERR(pages);
  
+       /*
+        * Flush (on non-coherent CPUs) before LAUNCH_SECRET encrypts pages in
+        * place; the cache may contain the data that was written unencrypted.
+        */
+       sev_clflush_pages(pages, n);
+
         /*
          * The secret must be copied into contiguous memory region, lets verify
          * that userspace memory pages are contiguous before we issue command.
@@ -915,6 +920,11 @@ e_free_blob:
  e_free:
         kfree(data);
  e_unpin_memory:
+       /* content of memory is updated, mark pages dirty */
+       for (i = 0; i < n; i++) {
+               set_page_dirty_lock(pages[i]);
+               mark_page_accessed(pages[i]);
+       }
         sev_unpin_memory(kvm, pages, n);
         return ret;
  }
@@ -924,7 +934,7 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
         struct kvm_sev_cmd sev_cmd;
         int r;
  
-       if (!svm_sev_enabled())
+       if (!svm_sev_enabled() || !sev)
                 return -ENOTTY;
  
         if (!argp)
@@ -1117,49 +1127,58 @@ void sev_vm_destroy(struct kvm *kvm)
         sev_asid_free(sev->asid);
  }
  
-int __init sev_hardware_setup(void)
+void __init sev_hardware_setup(void)
  {
-       struct sev_user_data_status *status;
-       int rc;
+       unsigned int eax, ebx, ecx, edx;
+       bool sev_es_supported = false;
+       bool sev_supported = false;
+
+       /* Does the CPU support SEV? */
+       if (!boot_cpu_has(X86_FEATURE_SEV))
+               goto out;
+
+       /* Retrieve SEV CPUID information */
+       cpuid(0x8000001f, &eax, &ebx, &ecx, &edx);
  
         /* Maximum number of encrypted guests supported simultaneously */
-       max_sev_asid = cpuid_ecx(0x8000001F);
+       max_sev_asid = ecx;
  
         if (!svm_sev_enabled())
-               return 1;
+               goto out;
  
         /* Minimum ASID value that should be used for SEV guest */
-       min_sev_asid = cpuid_edx(0x8000001F);
+       min_sev_asid = edx;
  
         /* Initialize SEV ASID bitmaps */
         sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
         if (!sev_asid_bitmap)
-               return 1;
+               goto out;
  
         sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
         if (!sev_reclaim_asid_bitmap)
-               return 1;
+               goto out;
  
-       status = kmalloc(sizeof(*status), GFP_KERNEL);
-       if (!status)
-               return 1;
+       pr_info("SEV supported: %u ASIDs\n", max_sev_asid - min_sev_asid + 1);
+       sev_supported = true;
  
-       /*
-        * Check SEV platform status.
-        *
-        * PLATFORM_STATUS can be called in any state, if we failed to query
-        * the PLATFORM status then either PSP firmware does not support SEV
-        * feature or SEV firmware is dead.
-        */
-       rc = sev_platform_status(status, NULL);
-       if (rc)
-               goto err;
+       /* SEV-ES support requested? */
+       if (!sev_es)
+               goto out;
+
+       /* Does the CPU support SEV-ES? */
+       if (!boot_cpu_has(X86_FEATURE_SEV_ES))
+               goto out;
  
-       pr_info("SEV supported\n");
+       /* Has the system been allocated ASIDs for SEV-ES? */
+       if (min_sev_asid == 1)
+               goto out;
  
-err:
-       kfree(status);
-       return rc;
+       pr_info("SEV-ES supported: %u ASIDs\n", min_sev_asid - 1);
+       sev_es_supported = true;
+
+out:
+       sev = sev_supported;
+       sev_es = sev_es_supported;
  }
  
  void sev_hardware_teardown(void)
@@ -1173,13 +1192,294 @@ void sev_hardware_teardown(void)
         sev_flush_asids();
  }
  
+/*
+ * Pages used by hardware to hold guest encrypted state must be flushed before
+ * returning them to the system.
+ */
+static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va,
+                                  unsigned long len)
+{
+       /*
+        * If hardware enforced cache coherency for encrypted mappings of the
+        * same physical page is supported, nothing to do.
+        */
+       if (boot_cpu_has(X86_FEATURE_SME_COHERENT))
+               return;
+
+       /*
+        * If the VM Page Flush MSR is supported, use it to flush the page
+        * (using the page virtual address and the guest ASID).
+        */
+       if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) {
+               struct kvm_sev_info *sev;
+               unsigned long va_start;
+               u64 start, stop;
+
+               /* Align start and stop to page boundaries. */
+               va_start = (unsigned long)va;
+               start = (u64)va_start & PAGE_MASK;
+               stop = PAGE_ALIGN((u64)va_start + len);
+
+               if (start < stop) {
+                       sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+
+                       while (start < stop) {
+                               wrmsrl(MSR_AMD64_VM_PAGE_FLUSH,
+                                      start | sev->asid);
+
+                               start += PAGE_SIZE;
+                       }
+
+                       return;
+               }
+
+               WARN(1, "Address overflow, using WBINVD\n");
+       }
+
+       /*
+        * Hardware should always have one of the above features,
+        * but if not, use WBINVD and issue a warning.
+        */
+       WARN_ONCE(1, "Using WBINVD to flush guest memory\n");
+       wbinvd_on_all_cpus();
+}
+
+void sev_free_vcpu(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_svm *svm;
+
+       if (!sev_es_guest(vcpu->kvm))
+               return;
+
+       svm = to_svm(vcpu);
+
+       if (vcpu->arch.guest_state_protected)
+               sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
+       __free_page(virt_to_page(svm->vmsa));
+}
+
+static void dump_ghcb(struct vcpu_svm *svm)
+{
+       struct ghcb *ghcb = svm->ghcb;
+       unsigned int nbits;
+
+       /* Re-use the dump_invalid_vmcb module parameter */
+       if (!dump_invalid_vmcb) {
+               pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
+               return;
+       }
+
+       nbits = sizeof(ghcb->save.valid_bitmap) * 8;
+
+       pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa);
+       pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code",
+              ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb));
+       pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1",
+              ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb));
+       pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2",
+              ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb));
+       pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
+              ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb));
+       pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
+}
+
+static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
+{
+       struct kvm_vcpu *vcpu = &svm->vcpu;
+       struct ghcb *ghcb = svm->ghcb;
+
+       /*
+        * The GHCB protocol so far allows for the following data
+        * to be returned:
+        *   GPRs RAX, RBX, RCX, RDX
+        *
+        * Copy their values to the GHCB if they are dirty.
+        */
+       if (kvm_register_is_dirty(vcpu, VCPU_REGS_RAX))
+               ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]);
+       if (kvm_register_is_dirty(vcpu, VCPU_REGS_RBX))
+               ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
+       if (kvm_register_is_dirty(vcpu, VCPU_REGS_RCX))
+               ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
+       if (kvm_register_is_dirty(vcpu, VCPU_REGS_RDX))
+               ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
+}
+
+static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
+{
+       struct vmcb_control_area *control = &svm->vmcb->control;
+       struct kvm_vcpu *vcpu = &svm->vcpu;
+       struct ghcb *ghcb = svm->ghcb;
+       u64 exit_code;
+
+       /*
+        * The GHCB protocol so far allows for the following data
+        * to be supplied:
+        *   GPRs RAX, RBX, RCX, RDX
+        *   XCR0
+        *   CPL
+        *
+        * VMMCALL allows the guest to provide extra registers. KVM also
+        * expects RSI for hypercalls, so include that, too.
+        *
+        * Copy their values to the appropriate location if supplied.
+        */
+       memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
+
+       vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb);
+       vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb);
+       vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb);
+       vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb);
+       vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb);
+
+       svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb);
+
+       if (ghcb_xcr0_is_valid(ghcb)) {
+               vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
+               kvm_update_cpuid_runtime(vcpu);
+       }
+
+       /* Copy the GHCB exit information into the VMCB fields */
+       exit_code = ghcb_get_sw_exit_code(ghcb);
+       control->exit_code = lower_32_bits(exit_code);
+       control->exit_code_hi = upper_32_bits(exit_code);
+       control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb);
+       control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb);
+
+       /* Clear the valid entries fields */
+       memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
+}
+
+static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
+{
+       struct kvm_vcpu *vcpu;
+       struct ghcb *ghcb;
+       u64 exit_code = 0;
+
+       ghcb = svm->ghcb;
+
+       /* Only GHCB Usage code 0 is supported */
+       if (ghcb->ghcb_usage)
+               goto vmgexit_err;
+
+       /*
+        * Retrieve the exit code now even though is may not be marked valid
+        * as it could help with debugging.
+        */
+       exit_code = ghcb_get_sw_exit_code(ghcb);
+
+       if (!ghcb_sw_exit_code_is_valid(ghcb) ||
+           !ghcb_sw_exit_info_1_is_valid(ghcb) ||
+           !ghcb_sw_exit_info_2_is_valid(ghcb))
+               goto vmgexit_err;
+
+       switch (ghcb_get_sw_exit_code(ghcb)) {
+       case SVM_EXIT_READ_DR7:
+               break;
+       case SVM_EXIT_WRITE_DR7:
+               if (!ghcb_rax_is_valid(ghcb))
+                       goto vmgexit_err;
+               break;
+       case SVM_EXIT_RDTSC:
+               break;
+       case SVM_EXIT_RDPMC:
+               if (!ghcb_rcx_is_valid(ghcb))
+                       goto vmgexit_err;
+               break;
+       case SVM_EXIT_CPUID:
+               if (!ghcb_rax_is_valid(ghcb) ||
+                   !ghcb_rcx_is_valid(ghcb))
+                       goto vmgexit_err;
+               if (ghcb_get_rax(ghcb) == 0xd)
+                       if (!ghcb_xcr0_is_valid(ghcb))
+                               goto vmgexit_err;
+               break;
+       case SVM_EXIT_INVD:
+               break;
+       case SVM_EXIT_IOIO:
+               if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
+                       if (!ghcb_rax_is_valid(ghcb))
+                               goto vmgexit_err;
+               break;
+       case SVM_EXIT_MSR:
+               if (!ghcb_rcx_is_valid(ghcb))
+                       goto vmgexit_err;
+               if (ghcb_get_sw_exit_info_1(ghcb)) {
+                       if (!ghcb_rax_is_valid(ghcb) ||
+                           !ghcb_rdx_is_valid(ghcb))
+                               goto vmgexit_err;
+               }
+               break;
+       case SVM_EXIT_VMMCALL:
+               if (!ghcb_rax_is_valid(ghcb) ||
+                   !ghcb_cpl_is_valid(ghcb))
+                       goto vmgexit_err;
+               break;
+       case SVM_EXIT_RDTSCP:
+               break;
+       case SVM_EXIT_WBINVD:
+               break;
+       case SVM_EXIT_MONITOR:
+               if (!ghcb_rax_is_valid(ghcb) ||
+                   !ghcb_rcx_is_valid(ghcb) ||
+                   !ghcb_rdx_is_valid(ghcb))
+                       goto vmgexit_err;
+               break;
+       case SVM_EXIT_MWAIT:
+               if (!ghcb_rax_is_valid(ghcb) ||
+                   !ghcb_rcx_is_valid(ghcb))
+                       goto vmgexit_err;
+               break;
+       case SVM_VMGEXIT_UNSUPPORTED_EVENT:
+               break;
+       default:
+               goto vmgexit_err;
+       }
+
+       return 0;
+
+vmgexit_err:
+       vcpu = &svm->vcpu;
+
+       if (ghcb->ghcb_usage) {
+               vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
+                           ghcb->ghcb_usage);
+       } else {
+               vcpu_unimpl(vcpu, "vmgexit: exit reason %#llx is not valid\n",
+                           exit_code);
+               dump_ghcb(svm);
+       }
+
+       vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+       vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+       vcpu->run->internal.ndata = 2;
+       vcpu->run->internal.data[0] = exit_code;
+       vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
+
+       return -EINVAL;
+}
+
+static void pre_sev_es_run(struct vcpu_svm *svm)
+{
+       if (!svm->ghcb)
+               return;
+
+       sev_es_sync_to_ghcb(svm);
+
+       kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true);
+       svm->ghcb = NULL;
+}
+
  void pre_sev_run(struct vcpu_svm *svm, int cpu)
  {
         struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
         int asid = sev_get_asid(svm->vcpu.kvm);
  
+       /* Perform any SEV-ES pre-run actions */
+       pre_sev_es_run(svm);
+
         /* Assign the asid allocated with this SEV guest */
-       svm->vmcb->control.asid = asid;
+       svm->asid = asid;
  
         /*
          * Flush guest TLB:
@@ -1195,3 +1495,59 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
         svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
         vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
  }
+
+static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
+{
+       return -EINVAL;
+}
+
+int sev_handle_vmgexit(struct vcpu_svm *svm)
+{
+       struct vmcb_control_area *control = &svm->vmcb->control;
+       u64 ghcb_gpa, exit_code;
+       struct ghcb *ghcb;
+       int ret;
+
+       /* Validate the GHCB */
+       ghcb_gpa = control->ghcb_gpa;
+       if (ghcb_gpa & GHCB_MSR_INFO_MASK)
+               return sev_handle_vmgexit_msr_protocol(svm);
+
+       if (!ghcb_gpa) {
+               vcpu_unimpl(&svm->vcpu, "vmgexit: GHCB gpa is not set\n");
+               return -EINVAL;
+       }
+
+       if (kvm_vcpu_map(&svm->vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) {
+               /* Unable to map GHCB from guest */
+               vcpu_unimpl(&svm->vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
+                           ghcb_gpa);
+               return -EINVAL;
+       }
+
+       svm->ghcb = svm->ghcb_map.hva;
+       ghcb = svm->ghcb_map.hva;
+
+       exit_code = ghcb_get_sw_exit_code(ghcb);
+
+       ret = sev_es_validate_vmgexit(svm);
+       if (ret)
+               return ret;
+
+       sev_es_sync_from_ghcb(svm);
+       ghcb_set_sw_exit_info_1(ghcb, 0);
+       ghcb_set_sw_exit_info_2(ghcb, 0);
+
+       ret = -EINVAL;
+       switch (exit_code) {
+       case SVM_VMGEXIT_UNSUPPORTED_EVENT:
+               vcpu_unimpl(&svm->vcpu,
+                           "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
+                           control->exit_info_1, control->exit_info_2);
+               break;
+       default:
+               ret = svm_invoke_exit_handler(svm, exit_code);
+       }
+
+       return ret;
+}