x86/sgx: Add helper to update SGX_LEPUBKEYHASHn MSRs
[linux-2.6-microblaze.git] / arch / x86 / kernel / cpu / sgx / main.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*  Copyright(c) 2016-20 Intel Corporation. */
3
4 #include <linux/freezer.h>
5 #include <linux/highmem.h>
6 #include <linux/kthread.h>
7 #include <linux/pagemap.h>
8 #include <linux/ratelimit.h>
9 #include <linux/sched/mm.h>
10 #include <linux/sched/signal.h>
11 #include <linux/slab.h>
12 #include "driver.h"
13 #include "encl.h"
14 #include "encls.h"
15
16 struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
17 static int sgx_nr_epc_sections;
18 static struct task_struct *ksgxd_tsk;
19 static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq);
20
21 /*
22  * These variables are part of the state of the reclaimer, and must be accessed
23  * with sgx_reclaimer_lock acquired.
24  */
25 static LIST_HEAD(sgx_active_page_list);
26 static DEFINE_SPINLOCK(sgx_reclaimer_lock);
27
28 /* The free page list lock protected variables prepend the lock. */
29 static unsigned long sgx_nr_free_pages;
30
31 /* Nodes with one or more EPC sections. */
32 static nodemask_t sgx_numa_mask;
33
34 /*
35  * Array with one list_head for each possible NUMA node.  Each
36  * list contains all the sgx_epc_section's which are on that
37  * node.
38  */
39 static struct sgx_numa_node *sgx_numa_nodes;
40
41 static LIST_HEAD(sgx_dirty_page_list);
42
43 /*
44  * Reset post-kexec EPC pages to the uninitialized state. The pages are removed
45  * from the input list, and made available for the page allocator. SECS pages
46  * prepending their children in the input list are left intact.
47  */
48 static void __sgx_sanitize_pages(struct list_head *dirty_page_list)
49 {
50         struct sgx_epc_page *page;
51         LIST_HEAD(dirty);
52         int ret;
53
54         /* dirty_page_list is thread-local, no need for a lock: */
55         while (!list_empty(dirty_page_list)) {
56                 if (kthread_should_stop())
57                         return;
58
59                 page = list_first_entry(dirty_page_list, struct sgx_epc_page, list);
60
61                 ret = __eremove(sgx_get_epc_virt_addr(page));
62                 if (!ret) {
63                         /*
64                          * page is now sanitized.  Make it available via the SGX
65                          * page allocator:
66                          */
67                         list_del(&page->list);
68                         sgx_free_epc_page(page);
69                 } else {
70                         /* The page is not yet clean - move to the dirty list. */
71                         list_move_tail(&page->list, &dirty);
72                 }
73
74                 cond_resched();
75         }
76
77         list_splice(&dirty, dirty_page_list);
78 }
79
80 static bool sgx_reclaimer_age(struct sgx_epc_page *epc_page)
81 {
82         struct sgx_encl_page *page = epc_page->owner;
83         struct sgx_encl *encl = page->encl;
84         struct sgx_encl_mm *encl_mm;
85         bool ret = true;
86         int idx;
87
88         idx = srcu_read_lock(&encl->srcu);
89
90         list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
91                 if (!mmget_not_zero(encl_mm->mm))
92                         continue;
93
94                 mmap_read_lock(encl_mm->mm);
95                 ret = !sgx_encl_test_and_clear_young(encl_mm->mm, page);
96                 mmap_read_unlock(encl_mm->mm);
97
98                 mmput_async(encl_mm->mm);
99
100                 if (!ret)
101                         break;
102         }
103
104         srcu_read_unlock(&encl->srcu, idx);
105
106         if (!ret)
107                 return false;
108
109         return true;
110 }
111
112 static void sgx_reclaimer_block(struct sgx_epc_page *epc_page)
113 {
114         struct sgx_encl_page *page = epc_page->owner;
115         unsigned long addr = page->desc & PAGE_MASK;
116         struct sgx_encl *encl = page->encl;
117         unsigned long mm_list_version;
118         struct sgx_encl_mm *encl_mm;
119         struct vm_area_struct *vma;
120         int idx, ret;
121
122         do {
123                 mm_list_version = encl->mm_list_version;
124
125                 /* Pairs with smp_rmb() in sgx_encl_mm_add(). */
126                 smp_rmb();
127
128                 idx = srcu_read_lock(&encl->srcu);
129
130                 list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
131                         if (!mmget_not_zero(encl_mm->mm))
132                                 continue;
133
134                         mmap_read_lock(encl_mm->mm);
135
136                         ret = sgx_encl_find(encl_mm->mm, addr, &vma);
137                         if (!ret && encl == vma->vm_private_data)
138                                 zap_vma_ptes(vma, addr, PAGE_SIZE);
139
140                         mmap_read_unlock(encl_mm->mm);
141
142                         mmput_async(encl_mm->mm);
143                 }
144
145                 srcu_read_unlock(&encl->srcu, idx);
146         } while (unlikely(encl->mm_list_version != mm_list_version));
147
148         mutex_lock(&encl->lock);
149
150         ret = __eblock(sgx_get_epc_virt_addr(epc_page));
151         if (encls_failed(ret))
152                 ENCLS_WARN(ret, "EBLOCK");
153
154         mutex_unlock(&encl->lock);
155 }
156
157 static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot,
158                           struct sgx_backing *backing)
159 {
160         struct sgx_pageinfo pginfo;
161         int ret;
162
163         pginfo.addr = 0;
164         pginfo.secs = 0;
165
166         pginfo.contents = (unsigned long)kmap_atomic(backing->contents);
167         pginfo.metadata = (unsigned long)kmap_atomic(backing->pcmd) +
168                           backing->pcmd_offset;
169
170         ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot);
171
172         kunmap_atomic((void *)(unsigned long)(pginfo.metadata -
173                                               backing->pcmd_offset));
174         kunmap_atomic((void *)(unsigned long)pginfo.contents);
175
176         return ret;
177 }
178
179 static void sgx_ipi_cb(void *info)
180 {
181 }
182
183 static const cpumask_t *sgx_encl_ewb_cpumask(struct sgx_encl *encl)
184 {
185         cpumask_t *cpumask = &encl->cpumask;
186         struct sgx_encl_mm *encl_mm;
187         int idx;
188
189         /*
190          * Can race with sgx_encl_mm_add(), but ETRACK has already been
191          * executed, which means that the CPUs running in the new mm will enter
192          * into the enclave with a fresh epoch.
193          */
194         cpumask_clear(cpumask);
195
196         idx = srcu_read_lock(&encl->srcu);
197
198         list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
199                 if (!mmget_not_zero(encl_mm->mm))
200                         continue;
201
202                 cpumask_or(cpumask, cpumask, mm_cpumask(encl_mm->mm));
203
204                 mmput_async(encl_mm->mm);
205         }
206
207         srcu_read_unlock(&encl->srcu, idx);
208
209         return cpumask;
210 }
211
212 /*
213  * Swap page to the regular memory transformed to the blocked state by using
214  * EBLOCK, which means that it can no loger be referenced (no new TLB entries).
215  *
216  * The first trial just tries to write the page assuming that some other thread
217  * has reset the count for threads inside the enlave by using ETRACK, and
218  * previous thread count has been zeroed out. The second trial calls ETRACK
219  * before EWB. If that fails we kick all the HW threads out, and then do EWB,
220  * which should be guaranteed the succeed.
221  */
222 static void sgx_encl_ewb(struct sgx_epc_page *epc_page,
223                          struct sgx_backing *backing)
224 {
225         struct sgx_encl_page *encl_page = epc_page->owner;
226         struct sgx_encl *encl = encl_page->encl;
227         struct sgx_va_page *va_page;
228         unsigned int va_offset;
229         void *va_slot;
230         int ret;
231
232         encl_page->desc &= ~SGX_ENCL_PAGE_BEING_RECLAIMED;
233
234         va_page = list_first_entry(&encl->va_pages, struct sgx_va_page,
235                                    list);
236         va_offset = sgx_alloc_va_slot(va_page);
237         va_slot = sgx_get_epc_virt_addr(va_page->epc_page) + va_offset;
238         if (sgx_va_page_full(va_page))
239                 list_move_tail(&va_page->list, &encl->va_pages);
240
241         ret = __sgx_encl_ewb(epc_page, va_slot, backing);
242         if (ret == SGX_NOT_TRACKED) {
243                 ret = __etrack(sgx_get_epc_virt_addr(encl->secs.epc_page));
244                 if (ret) {
245                         if (encls_failed(ret))
246                                 ENCLS_WARN(ret, "ETRACK");
247                 }
248
249                 ret = __sgx_encl_ewb(epc_page, va_slot, backing);
250                 if (ret == SGX_NOT_TRACKED) {
251                         /*
252                          * Slow path, send IPIs to kick cpus out of the
253                          * enclave.  Note, it's imperative that the cpu
254                          * mask is generated *after* ETRACK, else we'll
255                          * miss cpus that entered the enclave between
256                          * generating the mask and incrementing epoch.
257                          */
258                         on_each_cpu_mask(sgx_encl_ewb_cpumask(encl),
259                                          sgx_ipi_cb, NULL, 1);
260                         ret = __sgx_encl_ewb(epc_page, va_slot, backing);
261                 }
262         }
263
264         if (ret) {
265                 if (encls_failed(ret))
266                         ENCLS_WARN(ret, "EWB");
267
268                 sgx_free_va_slot(va_page, va_offset);
269         } else {
270                 encl_page->desc |= va_offset;
271                 encl_page->va_page = va_page;
272         }
273 }
274
275 static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
276                                 struct sgx_backing *backing)
277 {
278         struct sgx_encl_page *encl_page = epc_page->owner;
279         struct sgx_encl *encl = encl_page->encl;
280         struct sgx_backing secs_backing;
281         int ret;
282
283         mutex_lock(&encl->lock);
284
285         sgx_encl_ewb(epc_page, backing);
286         encl_page->epc_page = NULL;
287         encl->secs_child_cnt--;
288
289         if (!encl->secs_child_cnt && test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) {
290                 ret = sgx_encl_get_backing(encl, PFN_DOWN(encl->size),
291                                            &secs_backing);
292                 if (ret)
293                         goto out;
294
295                 sgx_encl_ewb(encl->secs.epc_page, &secs_backing);
296
297                 sgx_encl_free_epc_page(encl->secs.epc_page);
298                 encl->secs.epc_page = NULL;
299
300                 sgx_encl_put_backing(&secs_backing, true);
301         }
302
303 out:
304         mutex_unlock(&encl->lock);
305 }
306
307 /*
308  * Take a fixed number of pages from the head of the active page pool and
309  * reclaim them to the enclave's private shmem files. Skip the pages, which have
310  * been accessed since the last scan. Move those pages to the tail of active
311  * page pool so that the pages get scanned in LRU like fashion.
312  *
313  * Batch process a chunk of pages (at the moment 16) in order to degrade amount
314  * of IPI's and ETRACK's potentially required. sgx_encl_ewb() does degrade a bit
315  * among the HW threads with three stage EWB pipeline (EWB, ETRACK + EWB and IPI
316  * + EWB) but not sufficiently. Reclaiming one page at a time would also be
317  * problematic as it would increase the lock contention too much, which would
318  * halt forward progress.
319  */
320 static void sgx_reclaim_pages(void)
321 {
322         struct sgx_epc_page *chunk[SGX_NR_TO_SCAN];
323         struct sgx_backing backing[SGX_NR_TO_SCAN];
324         struct sgx_epc_section *section;
325         struct sgx_encl_page *encl_page;
326         struct sgx_epc_page *epc_page;
327         struct sgx_numa_node *node;
328         pgoff_t page_index;
329         int cnt = 0;
330         int ret;
331         int i;
332
333         spin_lock(&sgx_reclaimer_lock);
334         for (i = 0; i < SGX_NR_TO_SCAN; i++) {
335                 if (list_empty(&sgx_active_page_list))
336                         break;
337
338                 epc_page = list_first_entry(&sgx_active_page_list,
339                                             struct sgx_epc_page, list);
340                 list_del_init(&epc_page->list);
341                 encl_page = epc_page->owner;
342
343                 if (kref_get_unless_zero(&encl_page->encl->refcount) != 0)
344                         chunk[cnt++] = epc_page;
345                 else
346                         /* The owner is freeing the page. No need to add the
347                          * page back to the list of reclaimable pages.
348                          */
349                         epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
350         }
351         spin_unlock(&sgx_reclaimer_lock);
352
353         for (i = 0; i < cnt; i++) {
354                 epc_page = chunk[i];
355                 encl_page = epc_page->owner;
356
357                 if (!sgx_reclaimer_age(epc_page))
358                         goto skip;
359
360                 page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base);
361                 ret = sgx_encl_get_backing(encl_page->encl, page_index, &backing[i]);
362                 if (ret)
363                         goto skip;
364
365                 mutex_lock(&encl_page->encl->lock);
366                 encl_page->desc |= SGX_ENCL_PAGE_BEING_RECLAIMED;
367                 mutex_unlock(&encl_page->encl->lock);
368                 continue;
369
370 skip:
371                 spin_lock(&sgx_reclaimer_lock);
372                 list_add_tail(&epc_page->list, &sgx_active_page_list);
373                 spin_unlock(&sgx_reclaimer_lock);
374
375                 kref_put(&encl_page->encl->refcount, sgx_encl_release);
376
377                 chunk[i] = NULL;
378         }
379
380         for (i = 0; i < cnt; i++) {
381                 epc_page = chunk[i];
382                 if (epc_page)
383                         sgx_reclaimer_block(epc_page);
384         }
385
386         for (i = 0; i < cnt; i++) {
387                 epc_page = chunk[i];
388                 if (!epc_page)
389                         continue;
390
391                 encl_page = epc_page->owner;
392                 sgx_reclaimer_write(epc_page, &backing[i]);
393                 sgx_encl_put_backing(&backing[i], true);
394
395                 kref_put(&encl_page->encl->refcount, sgx_encl_release);
396                 epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
397
398                 section = &sgx_epc_sections[epc_page->section];
399                 node = section->node;
400
401                 spin_lock(&node->lock);
402                 list_add_tail(&epc_page->list, &node->free_page_list);
403                 sgx_nr_free_pages++;
404                 spin_unlock(&node->lock);
405         }
406 }
407
408 static bool sgx_should_reclaim(unsigned long watermark)
409 {
410         return sgx_nr_free_pages < watermark && !list_empty(&sgx_active_page_list);
411 }
412
413 static int ksgxd(void *p)
414 {
415         set_freezable();
416
417         /*
418          * Sanitize pages in order to recover from kexec(). The 2nd pass is
419          * required for SECS pages, whose child pages blocked EREMOVE.
420          */
421         __sgx_sanitize_pages(&sgx_dirty_page_list);
422         __sgx_sanitize_pages(&sgx_dirty_page_list);
423
424         /* sanity check: */
425         WARN_ON(!list_empty(&sgx_dirty_page_list));
426
427         while (!kthread_should_stop()) {
428                 if (try_to_freeze())
429                         continue;
430
431                 wait_event_freezable(ksgxd_waitq,
432                                      kthread_should_stop() ||
433                                      sgx_should_reclaim(SGX_NR_HIGH_PAGES));
434
435                 if (sgx_should_reclaim(SGX_NR_HIGH_PAGES))
436                         sgx_reclaim_pages();
437
438                 cond_resched();
439         }
440
441         return 0;
442 }
443
444 static bool __init sgx_page_reclaimer_init(void)
445 {
446         struct task_struct *tsk;
447
448         tsk = kthread_run(ksgxd, NULL, "ksgxd");
449         if (IS_ERR(tsk))
450                 return false;
451
452         ksgxd_tsk = tsk;
453
454         return true;
455 }
456
457 static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
458 {
459         struct sgx_numa_node *node = &sgx_numa_nodes[nid];
460         struct sgx_epc_page *page = NULL;
461
462         spin_lock(&node->lock);
463
464         if (list_empty(&node->free_page_list)) {
465                 spin_unlock(&node->lock);
466                 return NULL;
467         }
468
469         page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list);
470         list_del_init(&page->list);
471         sgx_nr_free_pages--;
472
473         spin_unlock(&node->lock);
474
475         return page;
476 }
477
478 /**
479  * __sgx_alloc_epc_page() - Allocate an EPC page
480  *
481  * Iterate through NUMA nodes and reserve ia free EPC page to the caller. Start
482  * from the NUMA node, where the caller is executing.
483  *
484  * Return:
485  * - an EPC page:       A borrowed EPC pages were available.
486  * - NULL:              Out of EPC pages.
487  */
488 struct sgx_epc_page *__sgx_alloc_epc_page(void)
489 {
490         struct sgx_epc_page *page;
491         int nid_of_current = numa_node_id();
492         int nid = nid_of_current;
493
494         if (node_isset(nid_of_current, sgx_numa_mask)) {
495                 page = __sgx_alloc_epc_page_from_node(nid_of_current);
496                 if (page)
497                         return page;
498         }
499
500         /* Fall back to the non-local NUMA nodes: */
501         while (true) {
502                 nid = next_node_in(nid, sgx_numa_mask);
503                 if (nid == nid_of_current)
504                         break;
505
506                 page = __sgx_alloc_epc_page_from_node(nid);
507                 if (page)
508                         return page;
509         }
510
511         return ERR_PTR(-ENOMEM);
512 }
513
514 /**
515  * sgx_mark_page_reclaimable() - Mark a page as reclaimable
516  * @page:       EPC page
517  *
518  * Mark a page as reclaimable and add it to the active page list. Pages
519  * are automatically removed from the active list when freed.
520  */
521 void sgx_mark_page_reclaimable(struct sgx_epc_page *page)
522 {
523         spin_lock(&sgx_reclaimer_lock);
524         page->flags |= SGX_EPC_PAGE_RECLAIMER_TRACKED;
525         list_add_tail(&page->list, &sgx_active_page_list);
526         spin_unlock(&sgx_reclaimer_lock);
527 }
528
529 /**
530  * sgx_unmark_page_reclaimable() - Remove a page from the reclaim list
531  * @page:       EPC page
532  *
533  * Clear the reclaimable flag and remove the page from the active page list.
534  *
535  * Return:
536  *   0 on success,
537  *   -EBUSY if the page is in the process of being reclaimed
538  */
539 int sgx_unmark_page_reclaimable(struct sgx_epc_page *page)
540 {
541         spin_lock(&sgx_reclaimer_lock);
542         if (page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) {
543                 /* The page is being reclaimed. */
544                 if (list_empty(&page->list)) {
545                         spin_unlock(&sgx_reclaimer_lock);
546                         return -EBUSY;
547                 }
548
549                 list_del(&page->list);
550                 page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
551         }
552         spin_unlock(&sgx_reclaimer_lock);
553
554         return 0;
555 }
556
557 /**
558  * sgx_alloc_epc_page() - Allocate an EPC page
559  * @owner:      the owner of the EPC page
560  * @reclaim:    reclaim pages if necessary
561  *
562  * Iterate through EPC sections and borrow a free EPC page to the caller. When a
563  * page is no longer needed it must be released with sgx_free_epc_page(). If
564  * @reclaim is set to true, directly reclaim pages when we are out of pages. No
565  * mm's can be locked when @reclaim is set to true.
566  *
567  * Finally, wake up ksgxd when the number of pages goes below the watermark
568  * before returning back to the caller.
569  *
570  * Return:
571  *   an EPC page,
572  *   -errno on error
573  */
574 struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim)
575 {
576         struct sgx_epc_page *page;
577
578         for ( ; ; ) {
579                 page = __sgx_alloc_epc_page();
580                 if (!IS_ERR(page)) {
581                         page->owner = owner;
582                         break;
583                 }
584
585                 if (list_empty(&sgx_active_page_list))
586                         return ERR_PTR(-ENOMEM);
587
588                 if (!reclaim) {
589                         page = ERR_PTR(-EBUSY);
590                         break;
591                 }
592
593                 if (signal_pending(current)) {
594                         page = ERR_PTR(-ERESTARTSYS);
595                         break;
596                 }
597
598                 sgx_reclaim_pages();
599                 cond_resched();
600         }
601
602         if (sgx_should_reclaim(SGX_NR_LOW_PAGES))
603                 wake_up(&ksgxd_waitq);
604
605         return page;
606 }
607
608 /**
609  * sgx_free_epc_page() - Free an EPC page
610  * @page:       an EPC page
611  *
612  * Put the EPC page back to the list of free pages. It's the caller's
613  * responsibility to make sure that the page is in uninitialized state. In other
614  * words, do EREMOVE, EWB or whatever operation is necessary before calling
615  * this function.
616  */
617 void sgx_free_epc_page(struct sgx_epc_page *page)
618 {
619         struct sgx_epc_section *section = &sgx_epc_sections[page->section];
620         struct sgx_numa_node *node = section->node;
621
622         spin_lock(&node->lock);
623
624         list_add_tail(&page->list, &node->free_page_list);
625         sgx_nr_free_pages++;
626
627         spin_unlock(&node->lock);
628 }
629
630 static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
631                                          unsigned long index,
632                                          struct sgx_epc_section *section)
633 {
634         unsigned long nr_pages = size >> PAGE_SHIFT;
635         unsigned long i;
636
637         section->virt_addr = memremap(phys_addr, size, MEMREMAP_WB);
638         if (!section->virt_addr)
639                 return false;
640
641         section->pages = vmalloc(nr_pages * sizeof(struct sgx_epc_page));
642         if (!section->pages) {
643                 memunmap(section->virt_addr);
644                 return false;
645         }
646
647         section->phys_addr = phys_addr;
648
649         for (i = 0; i < nr_pages; i++) {
650                 section->pages[i].section = index;
651                 section->pages[i].flags = 0;
652                 section->pages[i].owner = NULL;
653                 list_add_tail(&section->pages[i].list, &sgx_dirty_page_list);
654         }
655
656         sgx_nr_free_pages += nr_pages;
657         return true;
658 }
659
660 /**
661  * A section metric is concatenated in a way that @low bits 12-31 define the
662  * bits 12-31 of the metric and @high bits 0-19 define the bits 32-51 of the
663  * metric.
664  */
665 static inline u64 __init sgx_calc_section_metric(u64 low, u64 high)
666 {
667         return (low & GENMASK_ULL(31, 12)) +
668                ((high & GENMASK_ULL(19, 0)) << 32);
669 }
670
671 static bool __init sgx_page_cache_init(void)
672 {
673         u32 eax, ebx, ecx, edx, type;
674         u64 pa, size;
675         int nid;
676         int i;
677
678         sgx_numa_nodes = kmalloc_array(num_possible_nodes(), sizeof(*sgx_numa_nodes), GFP_KERNEL);
679         if (!sgx_numa_nodes)
680                 return false;
681
682         for (i = 0; i < ARRAY_SIZE(sgx_epc_sections); i++) {
683                 cpuid_count(SGX_CPUID, i + SGX_CPUID_EPC, &eax, &ebx, &ecx, &edx);
684
685                 type = eax & SGX_CPUID_EPC_MASK;
686                 if (type == SGX_CPUID_EPC_INVALID)
687                         break;
688
689                 if (type != SGX_CPUID_EPC_SECTION) {
690                         pr_err_once("Unknown EPC section type: %u\n", type);
691                         break;
692                 }
693
694                 pa   = sgx_calc_section_metric(eax, ebx);
695                 size = sgx_calc_section_metric(ecx, edx);
696
697                 pr_info("EPC section 0x%llx-0x%llx\n", pa, pa + size - 1);
698
699                 if (!sgx_setup_epc_section(pa, size, i, &sgx_epc_sections[i])) {
700                         pr_err("No free memory for an EPC section\n");
701                         break;
702                 }
703
704                 nid = numa_map_to_online_node(phys_to_target_node(pa));
705                 if (nid == NUMA_NO_NODE) {
706                         /* The physical address is already printed above. */
707                         pr_warn(FW_BUG "Unable to map EPC section to online node. Fallback to the NUMA node 0.\n");
708                         nid = 0;
709                 }
710
711                 if (!node_isset(nid, sgx_numa_mask)) {
712                         spin_lock_init(&sgx_numa_nodes[nid].lock);
713                         INIT_LIST_HEAD(&sgx_numa_nodes[nid].free_page_list);
714                         node_set(nid, sgx_numa_mask);
715                 }
716
717                 sgx_epc_sections[i].node =  &sgx_numa_nodes[nid];
718
719                 sgx_nr_epc_sections++;
720         }
721
722         if (!sgx_nr_epc_sections) {
723                 pr_err("There are zero EPC sections.\n");
724                 return false;
725         }
726
727         return true;
728 }
729
730 /*
731  * Update the SGX_LEPUBKEYHASH MSRs to the values specified by caller.
732  * Bare-metal driver requires to update them to hash of enclave's signer
733  * before EINIT. KVM needs to update them to guest's virtual MSR values
734  * before doing EINIT from guest.
735  */
736 void sgx_update_lepubkeyhash(u64 *lepubkeyhash)
737 {
738         int i;
739
740         WARN_ON_ONCE(preemptible());
741
742         for (i = 0; i < 4; i++)
743                 wrmsrl(MSR_IA32_SGXLEPUBKEYHASH0 + i, lepubkeyhash[i]);
744 }
745
746 static int __init sgx_init(void)
747 {
748         int ret;
749         int i;
750
751         if (!cpu_feature_enabled(X86_FEATURE_SGX))
752                 return -ENODEV;
753
754         if (!sgx_page_cache_init())
755                 return -ENOMEM;
756
757         if (!sgx_page_reclaimer_init()) {
758                 ret = -ENOMEM;
759                 goto err_page_cache;
760         }
761
762         /*
763          * Always try to initialize the native *and* KVM drivers.
764          * The KVM driver is less picky than the native one and
765          * can function if the native one is not supported on the
766          * current system or fails to initialize.
767          *
768          * Error out only if both fail to initialize.
769          */
770         ret = sgx_drv_init();
771
772         if (sgx_vepc_init() && ret)
773                 goto err_kthread;
774
775         return 0;
776
777 err_kthread:
778         kthread_stop(ksgxd_tsk);
779
780 err_page_cache:
781         for (i = 0; i < sgx_nr_epc_sections; i++) {
782                 vfree(sgx_epc_sections[i].pages);
783                 memunmap(sgx_epc_sections[i].virt_addr);
784         }
785
786         return ret;
787 }
788
789 device_initcall(sgx_init);