1 // SPDX-License-Identifier: GPL-2.0-only
3 * Kernel-based Virtual Machine driver for Linux
7 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
10 #include <linux/kvm_types.h>
11 #include <linux/kvm_host.h>
12 #include <linux/kernel.h>
13 #include <linux/highmem.h>
14 #include <linux/psp-sev.h>
15 #include <linux/pagemap.h>
16 #include <linux/swap.h>
17 #include <linux/processor.h>
23 static int sev_flush_asids(void);
24 static DECLARE_RWSEM(sev_deactivate_lock);
25 static DEFINE_MUTEX(sev_bitmap_lock);
26 unsigned int max_sev_asid;
27 static unsigned int min_sev_asid;
28 static unsigned long *sev_asid_bitmap;
29 static unsigned long *sev_reclaim_asid_bitmap;
30 #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
33 struct list_head list;
40 static int sev_flush_asids(void)
45 * DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail,
46 * so it must be guarded.
48 down_write(&sev_deactivate_lock);
51 ret = sev_guest_df_flush(&error);
53 up_write(&sev_deactivate_lock);
56 pr_err("SEV: DF_FLUSH failed, ret=%d, error=%#x\n", ret, error);
61 /* Must be called with the sev_bitmap_lock held */
62 static bool __sev_recycle_asids(void)
66 /* Check if there are any ASIDs to reclaim before performing a flush */
67 pos = find_next_bit(sev_reclaim_asid_bitmap,
68 max_sev_asid, min_sev_asid - 1);
69 if (pos >= max_sev_asid)
72 if (sev_flush_asids())
75 bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
77 bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
82 static int sev_asid_new(void)
87 mutex_lock(&sev_bitmap_lock);
90 * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
93 pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
94 if (pos >= max_sev_asid) {
95 if (retry && __sev_recycle_asids()) {
99 mutex_unlock(&sev_bitmap_lock);
103 __set_bit(pos, sev_asid_bitmap);
105 mutex_unlock(&sev_bitmap_lock);
110 static int sev_get_asid(struct kvm *kvm)
112 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
117 static void sev_asid_free(int asid)
119 struct svm_cpu_data *sd;
122 mutex_lock(&sev_bitmap_lock);
125 __set_bit(pos, sev_reclaim_asid_bitmap);
127 for_each_possible_cpu(cpu) {
128 sd = per_cpu(svm_data, cpu);
129 sd->sev_vmcbs[pos] = NULL;
132 mutex_unlock(&sev_bitmap_lock);
135 static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
137 struct sev_data_decommission *decommission;
138 struct sev_data_deactivate *data;
143 data = kzalloc(sizeof(*data), GFP_KERNEL);
147 /* deactivate handle */
148 data->handle = handle;
150 /* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */
151 down_read(&sev_deactivate_lock);
152 sev_guest_deactivate(data, NULL);
153 up_read(&sev_deactivate_lock);
157 decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
161 /* decommission handle */
162 decommission->handle = handle;
163 sev_guest_decommission(decommission, NULL);
168 static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
170 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
174 if (unlikely(sev->active))
177 asid = sev_asid_new();
181 ret = sev_platform_init(&argp->error);
187 INIT_LIST_HEAD(&sev->regions_list);
196 static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
198 struct sev_data_activate *data;
199 int asid = sev_get_asid(kvm);
202 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
206 /* activate ASID on the given handle */
207 data->handle = handle;
209 ret = sev_guest_activate(data, error);
215 static int __sev_issue_cmd(int fd, int id, void *data, int *error)
224 ret = sev_issue_cmd_external_user(f.file, id, data, error);
230 static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
232 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
234 return __sev_issue_cmd(sev->fd, id, data, error);
237 static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
239 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
240 struct sev_data_launch_start *start;
241 struct kvm_sev_launch_start params;
242 void *dh_blob, *session_blob;
243 int *error = &argp->error;
249 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
252 start = kzalloc(sizeof(*start), GFP_KERNEL_ACCOUNT);
257 if (params.dh_uaddr) {
258 dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
259 if (IS_ERR(dh_blob)) {
260 ret = PTR_ERR(dh_blob);
264 start->dh_cert_address = __sme_set(__pa(dh_blob));
265 start->dh_cert_len = params.dh_len;
269 if (params.session_uaddr) {
270 session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
271 if (IS_ERR(session_blob)) {
272 ret = PTR_ERR(session_blob);
276 start->session_address = __sme_set(__pa(session_blob));
277 start->session_len = params.session_len;
280 start->handle = params.handle;
281 start->policy = params.policy;
283 /* create memory encryption context */
284 ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error);
288 /* Bind ASID to this guest */
289 ret = sev_bind_asid(kvm, start->handle, error);
293 /* return handle to userspace */
294 params.handle = start->handle;
295 if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) {
296 sev_unbind_asid(kvm, start->handle);
301 sev->handle = start->handle;
302 sev->fd = argp->sev_fd;
313 static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
314 unsigned long ulen, unsigned long *n,
317 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
318 unsigned long npages, size;
320 unsigned long locked, lock_limit;
322 unsigned long first, last;
325 if (ulen == 0 || uaddr + ulen < uaddr)
326 return ERR_PTR(-EINVAL);
328 /* Calculate number of pages. */
329 first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
330 last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
331 npages = (last - first + 1);
333 locked = sev->pages_locked + npages;
334 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
335 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
336 pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
337 return ERR_PTR(-ENOMEM);
340 if (WARN_ON_ONCE(npages > INT_MAX))
341 return ERR_PTR(-EINVAL);
343 /* Avoid using vmalloc for smaller buffers. */
344 size = npages * sizeof(struct page *);
345 if (size > PAGE_SIZE)
346 pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
348 pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
351 return ERR_PTR(-ENOMEM);
353 /* Pin the user virtual address. */
354 npinned = pin_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
355 if (npinned != npages) {
356 pr_err("SEV: Failure locking %lu pages.\n", npages);
362 sev->pages_locked = locked;
368 unpin_user_pages(pages, npinned);
374 static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
375 unsigned long npages)
377 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
379 unpin_user_pages(pages, npages);
381 sev->pages_locked -= npages;
384 static void sev_clflush_pages(struct page *pages[], unsigned long npages)
386 uint8_t *page_virtual;
389 if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 ||
393 for (i = 0; i < npages; i++) {
394 page_virtual = kmap_atomic(pages[i]);
395 clflush_cache_range(page_virtual, PAGE_SIZE);
396 kunmap_atomic(page_virtual);
400 static unsigned long get_num_contig_pages(unsigned long idx,
401 struct page **inpages, unsigned long npages)
403 unsigned long paddr, next_paddr;
404 unsigned long i = idx + 1, pages = 1;
406 /* find the number of contiguous pages starting from idx */
407 paddr = __sme_page_pa(inpages[idx]);
409 next_paddr = __sme_page_pa(inpages[i++]);
410 if ((paddr + PAGE_SIZE) == next_paddr) {
421 static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
423 unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
424 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
425 struct kvm_sev_launch_update_data params;
426 struct sev_data_launch_update_data *data;
427 struct page **inpages;
433 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
436 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
440 vaddr = params.uaddr;
442 vaddr_end = vaddr + size;
444 /* Lock the user memory. */
445 inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
446 if (IS_ERR(inpages)) {
447 ret = PTR_ERR(inpages);
452 * Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in
453 * place; the cache may contain the data that was written unencrypted.
455 sev_clflush_pages(inpages, npages);
457 for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
461 * If the user buffer is not page-aligned, calculate the offset
464 offset = vaddr & (PAGE_SIZE - 1);
466 /* Calculate the number of pages that can be encrypted in one go. */
467 pages = get_num_contig_pages(i, inpages, npages);
469 len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
471 data->handle = sev->handle;
473 data->address = __sme_page_pa(inpages[i]) + offset;
474 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error);
479 next_vaddr = vaddr + len;
483 /* content of memory is updated, mark pages dirty */
484 for (i = 0; i < npages; i++) {
485 set_page_dirty_lock(inpages[i]);
486 mark_page_accessed(inpages[i]);
488 /* unlock the user pages */
489 sev_unpin_memory(kvm, inpages, npages);
495 static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
497 void __user *measure = (void __user *)(uintptr_t)argp->data;
498 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
499 struct sev_data_launch_measure *data;
500 struct kvm_sev_launch_measure params;
501 void __user *p = NULL;
508 if (copy_from_user(¶ms, measure, sizeof(params)))
511 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
515 /* User wants to query the blob length */
519 p = (void __user *)(uintptr_t)params.uaddr;
521 if (params.len > SEV_FW_BLOB_MAX_SIZE) {
527 blob = kmalloc(params.len, GFP_KERNEL);
531 data->address = __psp_pa(blob);
532 data->len = params.len;
536 data->handle = sev->handle;
537 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error);
540 * If we query the session length, FW responded with expected data.
549 if (copy_to_user(p, blob, params.len))
554 params.len = data->len;
555 if (copy_to_user(measure, ¶ms, sizeof(params)))
564 static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
566 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
567 struct sev_data_launch_finish *data;
573 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
577 data->handle = sev->handle;
578 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error);
584 static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
586 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
587 struct kvm_sev_guest_status params;
588 struct sev_data_guest_status *data;
594 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
598 data->handle = sev->handle;
599 ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error);
603 params.policy = data->policy;
604 params.state = data->state;
605 params.handle = data->handle;
607 if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params)))
614 static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
615 unsigned long dst, int size,
616 int *error, bool enc)
618 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
619 struct sev_data_dbg *data;
622 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
626 data->handle = sev->handle;
627 data->dst_addr = dst;
628 data->src_addr = src;
631 ret = sev_issue_cmd(kvm,
632 enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
638 static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
639 unsigned long dst_paddr, int sz, int *err)
644 * Its safe to read more than we are asked, caller should ensure that
645 * destination has enough space.
647 src_paddr = round_down(src_paddr, 16);
648 offset = src_paddr & 15;
649 sz = round_up(sz + offset, 16);
651 return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
654 static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
655 unsigned long __user dst_uaddr,
656 unsigned long dst_paddr,
659 struct page *tpage = NULL;
662 /* if inputs are not 16-byte then use intermediate buffer */
663 if (!IS_ALIGNED(dst_paddr, 16) ||
664 !IS_ALIGNED(paddr, 16) ||
665 !IS_ALIGNED(size, 16)) {
666 tpage = (void *)alloc_page(GFP_KERNEL);
670 dst_paddr = __sme_page_pa(tpage);
673 ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
679 if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
680 page_address(tpage) + offset, size))
691 static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
692 unsigned long __user vaddr,
693 unsigned long dst_paddr,
694 unsigned long __user dst_vaddr,
695 int size, int *error)
697 struct page *src_tpage = NULL;
698 struct page *dst_tpage = NULL;
701 /* If source buffer is not aligned then use an intermediate buffer */
702 if (!IS_ALIGNED(vaddr, 16)) {
703 src_tpage = alloc_page(GFP_KERNEL);
707 if (copy_from_user(page_address(src_tpage),
708 (void __user *)(uintptr_t)vaddr, size)) {
709 __free_page(src_tpage);
713 paddr = __sme_page_pa(src_tpage);
717 * If destination buffer or length is not aligned then do read-modify-write:
718 * - decrypt destination in an intermediate buffer
719 * - copy the source buffer in an intermediate buffer
720 * - use the intermediate buffer as source buffer
722 if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
725 dst_tpage = alloc_page(GFP_KERNEL);
731 ret = __sev_dbg_decrypt(kvm, dst_paddr,
732 __sme_page_pa(dst_tpage), size, error);
737 * If source is kernel buffer then use memcpy() otherwise
740 dst_offset = dst_paddr & 15;
743 memcpy(page_address(dst_tpage) + dst_offset,
744 page_address(src_tpage), size);
746 if (copy_from_user(page_address(dst_tpage) + dst_offset,
747 (void __user *)(uintptr_t)vaddr, size)) {
753 paddr = __sme_page_pa(dst_tpage);
754 dst_paddr = round_down(dst_paddr, 16);
755 len = round_up(size, 16);
758 ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);
762 __free_page(src_tpage);
764 __free_page(dst_tpage);
768 static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
770 unsigned long vaddr, vaddr_end, next_vaddr;
771 unsigned long dst_vaddr;
772 struct page **src_p, **dst_p;
773 struct kvm_sev_dbg debug;
781 if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
784 if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr)
786 if (!debug.dst_uaddr)
789 vaddr = debug.src_uaddr;
791 vaddr_end = vaddr + size;
792 dst_vaddr = debug.dst_uaddr;
794 for (; vaddr < vaddr_end; vaddr = next_vaddr) {
795 int len, s_off, d_off;
797 /* lock userspace source and destination page */
798 src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
800 return PTR_ERR(src_p);
802 dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
804 sev_unpin_memory(kvm, src_p, n);
805 return PTR_ERR(dst_p);
809 * Flush (on non-coherent CPUs) before DBG_{DE,EN}CRYPT read or modify
810 * the pages; flush the destination too so that future accesses do not
813 sev_clflush_pages(src_p, 1);
814 sev_clflush_pages(dst_p, 1);
817 * Since user buffer may not be page aligned, calculate the
818 * offset within the page.
820 s_off = vaddr & ~PAGE_MASK;
821 d_off = dst_vaddr & ~PAGE_MASK;
822 len = min_t(size_t, (PAGE_SIZE - s_off), size);
825 ret = __sev_dbg_decrypt_user(kvm,
826 __sme_page_pa(src_p[0]) + s_off,
828 __sme_page_pa(dst_p[0]) + d_off,
831 ret = __sev_dbg_encrypt_user(kvm,
832 __sme_page_pa(src_p[0]) + s_off,
834 __sme_page_pa(dst_p[0]) + d_off,
838 sev_unpin_memory(kvm, src_p, n);
839 sev_unpin_memory(kvm, dst_p, n);
844 next_vaddr = vaddr + len;
845 dst_vaddr = dst_vaddr + len;
852 static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
854 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
855 struct sev_data_launch_secret *data;
856 struct kvm_sev_launch_secret params;
865 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
868 pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
870 return PTR_ERR(pages);
873 * Flush (on non-coherent CPUs) before LAUNCH_SECRET encrypts pages in
874 * place; the cache may contain the data that was written unencrypted.
876 sev_clflush_pages(pages, n);
879 * The secret must be copied into contiguous memory region, lets verify
880 * that userspace memory pages are contiguous before we issue command.
882 if (get_num_contig_pages(0, pages, n) != n) {
888 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
892 offset = params.guest_uaddr & (PAGE_SIZE - 1);
893 data->guest_address = __sme_page_pa(pages[0]) + offset;
894 data->guest_len = params.guest_len;
896 blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
902 data->trans_address = __psp_pa(blob);
903 data->trans_len = params.trans_len;
905 hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
910 data->hdr_address = __psp_pa(hdr);
911 data->hdr_len = params.hdr_len;
913 data->handle = sev->handle;
914 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
923 /* content of memory is updated, mark pages dirty */
924 for (i = 0; i < n; i++) {
925 set_page_dirty_lock(pages[i]);
926 mark_page_accessed(pages[i]);
928 sev_unpin_memory(kvm, pages, n);
932 int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
934 struct kvm_sev_cmd sev_cmd;
937 if (!svm_sev_enabled() || !sev)
943 if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
946 mutex_lock(&kvm->lock);
948 switch (sev_cmd.id) {
950 r = sev_guest_init(kvm, &sev_cmd);
952 case KVM_SEV_LAUNCH_START:
953 r = sev_launch_start(kvm, &sev_cmd);
955 case KVM_SEV_LAUNCH_UPDATE_DATA:
956 r = sev_launch_update_data(kvm, &sev_cmd);
958 case KVM_SEV_LAUNCH_MEASURE:
959 r = sev_launch_measure(kvm, &sev_cmd);
961 case KVM_SEV_LAUNCH_FINISH:
962 r = sev_launch_finish(kvm, &sev_cmd);
964 case KVM_SEV_GUEST_STATUS:
965 r = sev_guest_status(kvm, &sev_cmd);
967 case KVM_SEV_DBG_DECRYPT:
968 r = sev_dbg_crypt(kvm, &sev_cmd, true);
970 case KVM_SEV_DBG_ENCRYPT:
971 r = sev_dbg_crypt(kvm, &sev_cmd, false);
973 case KVM_SEV_LAUNCH_SECRET:
974 r = sev_launch_secret(kvm, &sev_cmd);
981 if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
985 mutex_unlock(&kvm->lock);
989 int svm_register_enc_region(struct kvm *kvm,
990 struct kvm_enc_region *range)
992 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
993 struct enc_region *region;
999 if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
1002 region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT);
1006 region->pages = sev_pin_memory(kvm, range->addr, range->size, ®ion->npages, 1);
1007 if (IS_ERR(region->pages)) {
1008 ret = PTR_ERR(region->pages);
1013 * The guest may change the memory encryption attribute from C=0 -> C=1
1014 * or vice versa for this memory range. Lets make sure caches are
1015 * flushed to ensure that guest data gets written into memory with
1018 sev_clflush_pages(region->pages, region->npages);
1020 region->uaddr = range->addr;
1021 region->size = range->size;
1023 mutex_lock(&kvm->lock);
1024 list_add_tail(®ion->list, &sev->regions_list);
1025 mutex_unlock(&kvm->lock);
1034 static struct enc_region *
1035 find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
1037 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1038 struct list_head *head = &sev->regions_list;
1039 struct enc_region *i;
1041 list_for_each_entry(i, head, list) {
1042 if (i->uaddr == range->addr &&
1043 i->size == range->size)
1050 static void __unregister_enc_region_locked(struct kvm *kvm,
1051 struct enc_region *region)
1053 sev_unpin_memory(kvm, region->pages, region->npages);
1054 list_del(®ion->list);
1058 int svm_unregister_enc_region(struct kvm *kvm,
1059 struct kvm_enc_region *range)
1061 struct enc_region *region;
1064 mutex_lock(&kvm->lock);
1066 if (!sev_guest(kvm)) {
1071 region = find_enc_region(kvm, range);
1078 * Ensure that all guest tagged cache entries are flushed before
1079 * releasing the pages back to the system for use. CLFLUSH will
1080 * not do this, so issue a WBINVD.
1082 wbinvd_on_all_cpus();
1084 __unregister_enc_region_locked(kvm, region);
1086 mutex_unlock(&kvm->lock);
1090 mutex_unlock(&kvm->lock);
1094 void sev_vm_destroy(struct kvm *kvm)
1096 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1097 struct list_head *head = &sev->regions_list;
1098 struct list_head *pos, *q;
1100 if (!sev_guest(kvm))
1103 mutex_lock(&kvm->lock);
1106 * Ensure that all guest tagged cache entries are flushed before
1107 * releasing the pages back to the system for use. CLFLUSH will
1108 * not do this, so issue a WBINVD.
1110 wbinvd_on_all_cpus();
1113 * if userspace was terminated before unregistering the memory regions
1114 * then lets unpin all the registered memory.
1116 if (!list_empty(head)) {
1117 list_for_each_safe(pos, q, head) {
1118 __unregister_enc_region_locked(kvm,
1119 list_entry(pos, struct enc_region, list));
1124 mutex_unlock(&kvm->lock);
1126 sev_unbind_asid(kvm, sev->handle);
1127 sev_asid_free(sev->asid);
1130 void __init sev_hardware_setup(void)
1132 unsigned int eax, ebx, ecx, edx;
1133 bool sev_es_supported = false;
1134 bool sev_supported = false;
1136 /* Does the CPU support SEV? */
1137 if (!boot_cpu_has(X86_FEATURE_SEV))
1140 /* Retrieve SEV CPUID information */
1141 cpuid(0x8000001f, &eax, &ebx, &ecx, &edx);
1143 /* Maximum number of encrypted guests supported simultaneously */
1146 if (!svm_sev_enabled())
1149 /* Minimum ASID value that should be used for SEV guest */
1152 /* Initialize SEV ASID bitmaps */
1153 sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
1154 if (!sev_asid_bitmap)
1157 sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
1158 if (!sev_reclaim_asid_bitmap)
1161 pr_info("SEV supported: %u ASIDs\n", max_sev_asid - min_sev_asid + 1);
1162 sev_supported = true;
1164 /* SEV-ES support requested? */
1168 /* Does the CPU support SEV-ES? */
1169 if (!boot_cpu_has(X86_FEATURE_SEV_ES))
1172 /* Has the system been allocated ASIDs for SEV-ES? */
1173 if (min_sev_asid == 1)
1176 pr_info("SEV-ES supported: %u ASIDs\n", min_sev_asid - 1);
1177 sev_es_supported = true;
1180 sev = sev_supported;
1181 sev_es = sev_es_supported;
1184 void sev_hardware_teardown(void)
1186 if (!svm_sev_enabled())
1189 bitmap_free(sev_asid_bitmap);
1190 bitmap_free(sev_reclaim_asid_bitmap);
1196 * Pages used by hardware to hold guest encrypted state must be flushed before
1197 * returning them to the system.
1199 static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va,
1203 * If hardware enforced cache coherency for encrypted mappings of the
1204 * same physical page is supported, nothing to do.
1206 if (boot_cpu_has(X86_FEATURE_SME_COHERENT))
1210 * If the VM Page Flush MSR is supported, use it to flush the page
1211 * (using the page virtual address and the guest ASID).
1213 if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) {
1214 struct kvm_sev_info *sev;
1215 unsigned long va_start;
1218 /* Align start and stop to page boundaries. */
1219 va_start = (unsigned long)va;
1220 start = (u64)va_start & PAGE_MASK;
1221 stop = PAGE_ALIGN((u64)va_start + len);
1224 sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
1226 while (start < stop) {
1227 wrmsrl(MSR_AMD64_VM_PAGE_FLUSH,
1236 WARN(1, "Address overflow, using WBINVD\n");
1240 * Hardware should always have one of the above features,
1241 * but if not, use WBINVD and issue a warning.
1243 WARN_ONCE(1, "Using WBINVD to flush guest memory\n");
1244 wbinvd_on_all_cpus();
1247 void sev_free_vcpu(struct kvm_vcpu *vcpu)
1249 struct vcpu_svm *svm;
1251 if (!sev_es_guest(vcpu->kvm))
1256 if (vcpu->arch.guest_state_protected)
1257 sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
1258 __free_page(virt_to_page(svm->vmsa));
1261 static void dump_ghcb(struct vcpu_svm *svm)
1263 struct ghcb *ghcb = svm->ghcb;
1266 /* Re-use the dump_invalid_vmcb module parameter */
1267 if (!dump_invalid_vmcb) {
1268 pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
1272 nbits = sizeof(ghcb->save.valid_bitmap) * 8;
1274 pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa);
1275 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code",
1276 ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb));
1277 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1",
1278 ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb));
1279 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2",
1280 ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb));
1281 pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
1282 ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb));
1283 pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
1286 static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
1288 struct kvm_vcpu *vcpu = &svm->vcpu;
1289 struct ghcb *ghcb = svm->ghcb;
1292 * The GHCB protocol so far allows for the following data
1294 * GPRs RAX, RBX, RCX, RDX
1296 * Copy their values to the GHCB if they are dirty.
1298 if (kvm_register_is_dirty(vcpu, VCPU_REGS_RAX))
1299 ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]);
1300 if (kvm_register_is_dirty(vcpu, VCPU_REGS_RBX))
1301 ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
1302 if (kvm_register_is_dirty(vcpu, VCPU_REGS_RCX))
1303 ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
1304 if (kvm_register_is_dirty(vcpu, VCPU_REGS_RDX))
1305 ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
1308 static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
1310 struct vmcb_control_area *control = &svm->vmcb->control;
1311 struct kvm_vcpu *vcpu = &svm->vcpu;
1312 struct ghcb *ghcb = svm->ghcb;
1316 * The GHCB protocol so far allows for the following data
1318 * GPRs RAX, RBX, RCX, RDX
1322 * VMMCALL allows the guest to provide extra registers. KVM also
1323 * expects RSI for hypercalls, so include that, too.
1325 * Copy their values to the appropriate location if supplied.
1327 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
1329 vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb);
1330 vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb);
1331 vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb);
1332 vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb);
1333 vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb);
1335 svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb);
1337 if (ghcb_xcr0_is_valid(ghcb)) {
1338 vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
1339 kvm_update_cpuid_runtime(vcpu);
1342 /* Copy the GHCB exit information into the VMCB fields */
1343 exit_code = ghcb_get_sw_exit_code(ghcb);
1344 control->exit_code = lower_32_bits(exit_code);
1345 control->exit_code_hi = upper_32_bits(exit_code);
1346 control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb);
1347 control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb);
1349 /* Clear the valid entries fields */
1350 memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
1353 static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
1355 struct kvm_vcpu *vcpu;
1361 /* Only GHCB Usage code 0 is supported */
1362 if (ghcb->ghcb_usage)
1366 * Retrieve the exit code now even though is may not be marked valid
1367 * as it could help with debugging.
1369 exit_code = ghcb_get_sw_exit_code(ghcb);
1371 if (!ghcb_sw_exit_code_is_valid(ghcb) ||
1372 !ghcb_sw_exit_info_1_is_valid(ghcb) ||
1373 !ghcb_sw_exit_info_2_is_valid(ghcb))
1376 switch (ghcb_get_sw_exit_code(ghcb)) {
1377 case SVM_EXIT_READ_DR7:
1379 case SVM_EXIT_WRITE_DR7:
1380 if (!ghcb_rax_is_valid(ghcb))
1383 case SVM_EXIT_RDTSC:
1385 case SVM_EXIT_RDPMC:
1386 if (!ghcb_rcx_is_valid(ghcb))
1389 case SVM_EXIT_CPUID:
1390 if (!ghcb_rax_is_valid(ghcb) ||
1391 !ghcb_rcx_is_valid(ghcb))
1393 if (ghcb_get_rax(ghcb) == 0xd)
1394 if (!ghcb_xcr0_is_valid(ghcb))
1400 if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
1401 if (!ghcb_rax_is_valid(ghcb))
1405 if (!ghcb_rcx_is_valid(ghcb))
1407 if (ghcb_get_sw_exit_info_1(ghcb)) {
1408 if (!ghcb_rax_is_valid(ghcb) ||
1409 !ghcb_rdx_is_valid(ghcb))
1413 case SVM_EXIT_VMMCALL:
1414 if (!ghcb_rax_is_valid(ghcb) ||
1415 !ghcb_cpl_is_valid(ghcb))
1418 case SVM_EXIT_RDTSCP:
1420 case SVM_EXIT_WBINVD:
1422 case SVM_EXIT_MONITOR:
1423 if (!ghcb_rax_is_valid(ghcb) ||
1424 !ghcb_rcx_is_valid(ghcb) ||
1425 !ghcb_rdx_is_valid(ghcb))
1428 case SVM_EXIT_MWAIT:
1429 if (!ghcb_rax_is_valid(ghcb) ||
1430 !ghcb_rcx_is_valid(ghcb))
1433 case SVM_VMGEXIT_UNSUPPORTED_EVENT:
1444 if (ghcb->ghcb_usage) {
1445 vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
1448 vcpu_unimpl(vcpu, "vmgexit: exit reason %#llx is not valid\n",
1453 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1454 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
1455 vcpu->run->internal.ndata = 2;
1456 vcpu->run->internal.data[0] = exit_code;
1457 vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
1462 static void pre_sev_es_run(struct vcpu_svm *svm)
1467 sev_es_sync_to_ghcb(svm);
1469 kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true);
1473 void pre_sev_run(struct vcpu_svm *svm, int cpu)
1475 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
1476 int asid = sev_get_asid(svm->vcpu.kvm);
1478 /* Perform any SEV-ES pre-run actions */
1479 pre_sev_es_run(svm);
1481 /* Assign the asid allocated with this SEV guest */
1487 * 1) when different VMCB for the same ASID is to be run on the same host CPU.
1488 * 2) or this VMCB was executed on different host CPU in previous VMRUNs.
1490 if (sd->sev_vmcbs[asid] == svm->vmcb &&
1491 svm->vcpu.arch.last_vmentry_cpu == cpu)
1494 sd->sev_vmcbs[asid] = svm->vmcb;
1495 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
1496 vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
1499 static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
1504 int sev_handle_vmgexit(struct vcpu_svm *svm)
1506 struct vmcb_control_area *control = &svm->vmcb->control;
1507 u64 ghcb_gpa, exit_code;
1511 /* Validate the GHCB */
1512 ghcb_gpa = control->ghcb_gpa;
1513 if (ghcb_gpa & GHCB_MSR_INFO_MASK)
1514 return sev_handle_vmgexit_msr_protocol(svm);
1517 vcpu_unimpl(&svm->vcpu, "vmgexit: GHCB gpa is not set\n");
1521 if (kvm_vcpu_map(&svm->vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) {
1522 /* Unable to map GHCB from guest */
1523 vcpu_unimpl(&svm->vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
1528 svm->ghcb = svm->ghcb_map.hva;
1529 ghcb = svm->ghcb_map.hva;
1531 exit_code = ghcb_get_sw_exit_code(ghcb);
1533 ret = sev_es_validate_vmgexit(svm);
1537 sev_es_sync_from_ghcb(svm);
1538 ghcb_set_sw_exit_info_1(ghcb, 0);
1539 ghcb_set_sw_exit_info_2(ghcb, 0);
1542 switch (exit_code) {
1543 case SVM_VMGEXIT_UNSUPPORTED_EVENT:
1544 vcpu_unimpl(&svm->vcpu,
1545 "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
1546 control->exit_info_1, control->exit_info_2);
1549 ret = svm_invoke_exit_handler(svm, exit_code);