x86/sgx: Wipe out EREMOVE from sgx_free_epc_page()
[linux-2.6-microblaze.git] / arch / x86 / kernel / cpu / sgx / main.c
index 8df81a3..b227629 100644 (file)
@@ -23,42 +23,58 @@ static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq);
  * with sgx_reclaimer_lock acquired.
  */
 static LIST_HEAD(sgx_active_page_list);
-
 static DEFINE_SPINLOCK(sgx_reclaimer_lock);
 
+/* The free page list lock protected variables prepend the lock. */
+static unsigned long sgx_nr_free_pages;
+
+/* Nodes with one or more EPC sections. */
+static nodemask_t sgx_numa_mask;
+
+/*
+ * Array with one list_head for each possible NUMA node.  Each
+ * list contains all the sgx_epc_section's which are on that
+ * node.
+ */
+static struct sgx_numa_node *sgx_numa_nodes;
+
+static LIST_HEAD(sgx_dirty_page_list);
+
 /*
- * Reset dirty EPC pages to uninitialized state. Laundry can be left with SECS
- * pages whose child pages blocked EREMOVE.
+ * Reset post-kexec EPC pages to the uninitialized state. The pages are removed
+ * from the input list, and made available for the page allocator. SECS pages
+ * prepending their children in the input list are left intact.
  */
-static void sgx_sanitize_section(struct sgx_epc_section *section)
+static void __sgx_sanitize_pages(struct list_head *dirty_page_list)
 {
        struct sgx_epc_page *page;
        LIST_HEAD(dirty);
        int ret;
 
-       /* init_laundry_list is thread-local, no need for a lock: */
-       while (!list_empty(&section->init_laundry_list)) {
+       /* dirty_page_list is thread-local, no need for a lock: */
+       while (!list_empty(dirty_page_list)) {
                if (kthread_should_stop())
                        return;
 
-               /* needed for access to ->page_list: */
-               spin_lock(&section->lock);
-
-               page = list_first_entry(&section->init_laundry_list,
-                                       struct sgx_epc_page, list);
+               page = list_first_entry(dirty_page_list, struct sgx_epc_page, list);
 
                ret = __eremove(sgx_get_epc_virt_addr(page));
-               if (!ret)
-                       list_move(&page->list, &section->page_list);
-               else
+               if (!ret) {
+                       /*
+                        * page is now sanitized.  Make it available via the SGX
+                        * page allocator:
+                        */
+                       list_del(&page->list);
+                       sgx_free_epc_page(page);
+               } else {
+                       /* The page is not yet clean - move to the dirty list. */
                        list_move_tail(&page->list, &dirty);
-
-               spin_unlock(&section->lock);
+               }
 
                cond_resched();
        }
 
-       list_splice(&dirty, &section->init_laundry_list);
+       list_splice(&dirty, dirty_page_list);
 }
 
 static bool sgx_reclaimer_age(struct sgx_epc_page *epc_page)
@@ -278,7 +294,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
 
                sgx_encl_ewb(encl->secs.epc_page, &secs_backing);
 
-               sgx_free_epc_page(encl->secs.epc_page);
+               sgx_encl_free_epc_page(encl->secs.epc_page);
                encl->secs.epc_page = NULL;
 
                sgx_encl_put_backing(&secs_backing, true);
@@ -308,6 +324,7 @@ static void sgx_reclaim_pages(void)
        struct sgx_epc_section *section;
        struct sgx_encl_page *encl_page;
        struct sgx_epc_page *epc_page;
+       struct sgx_numa_node *node;
        pgoff_t page_index;
        int cnt = 0;
        int ret;
@@ -379,50 +396,33 @@ skip:
                epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
 
                section = &sgx_epc_sections[epc_page->section];
-               spin_lock(&section->lock);
-               list_add_tail(&epc_page->list, &section->page_list);
-               section->free_cnt++;
-               spin_unlock(&section->lock);
-       }
-}
-
-static unsigned long sgx_nr_free_pages(void)
-{
-       unsigned long cnt = 0;
-       int i;
-
-       for (i = 0; i < sgx_nr_epc_sections; i++)
-               cnt += sgx_epc_sections[i].free_cnt;
+               node = section->node;
 
-       return cnt;
+               spin_lock(&node->lock);
+               list_add_tail(&epc_page->list, &node->free_page_list);
+               sgx_nr_free_pages++;
+               spin_unlock(&node->lock);
+       }
 }
 
 static bool sgx_should_reclaim(unsigned long watermark)
 {
-       return sgx_nr_free_pages() < watermark &&
-              !list_empty(&sgx_active_page_list);
+       return sgx_nr_free_pages < watermark && !list_empty(&sgx_active_page_list);
 }
 
 static int ksgxd(void *p)
 {
-       int i;
-
        set_freezable();
 
        /*
         * Sanitize pages in order to recover from kexec(). The 2nd pass is
         * required for SECS pages, whose child pages blocked EREMOVE.
         */
-       for (i = 0; i < sgx_nr_epc_sections; i++)
-               sgx_sanitize_section(&sgx_epc_sections[i]);
+       __sgx_sanitize_pages(&sgx_dirty_page_list);
+       __sgx_sanitize_pages(&sgx_dirty_page_list);
 
-       for (i = 0; i < sgx_nr_epc_sections; i++) {
-               sgx_sanitize_section(&sgx_epc_sections[i]);
-
-               /* Should never happen. */
-               if (!list_empty(&sgx_epc_sections[i].init_laundry_list))
-                       WARN(1, "EPC section %d has unsanitized pages.\n", i);
-       }
+       /* sanity check: */
+       WARN_ON(!list_empty(&sgx_dirty_page_list));
 
        while (!kthread_should_stop()) {
                if (try_to_freeze())
@@ -454,45 +454,56 @@ static bool __init sgx_page_reclaimer_init(void)
        return true;
 }
 
-static struct sgx_epc_page *__sgx_alloc_epc_page_from_section(struct sgx_epc_section *section)
+static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
 {
-       struct sgx_epc_page *page;
+       struct sgx_numa_node *node = &sgx_numa_nodes[nid];
+       struct sgx_epc_page *page = NULL;
 
-       spin_lock(&section->lock);
+       spin_lock(&node->lock);
 
-       if (list_empty(&section->page_list)) {
-               spin_unlock(&section->lock);
+       if (list_empty(&node->free_page_list)) {
+               spin_unlock(&node->lock);
                return NULL;
        }
 
-       page = list_first_entry(&section->page_list, struct sgx_epc_page, list);
+       page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list);
        list_del_init(&page->list);
-       section->free_cnt--;
+       sgx_nr_free_pages--;
+
+       spin_unlock(&node->lock);
 
-       spin_unlock(&section->lock);
        return page;
 }
 
 /**
  * __sgx_alloc_epc_page() - Allocate an EPC page
  *
- * Iterate through EPC sections and borrow a free EPC page to the caller. When a
- * page is no longer needed it must be released with sgx_free_epc_page().
+ * Iterate through NUMA nodes and reserve ia free EPC page to the caller. Start
+ * from the NUMA node, where the caller is executing.
  *
  * Return:
- *   an EPC page,
- *   -errno on error
+ * - an EPC page:      A borrowed EPC pages were available.
+ * - NULL:             Out of EPC pages.
  */
 struct sgx_epc_page *__sgx_alloc_epc_page(void)
 {
-       struct sgx_epc_section *section;
        struct sgx_epc_page *page;
-       int i;
+       int nid_of_current = numa_node_id();
+       int nid = nid_of_current;
 
-       for (i = 0; i < sgx_nr_epc_sections; i++) {
-               section = &sgx_epc_sections[i];
+       if (node_isset(nid_of_current, sgx_numa_mask)) {
+               page = __sgx_alloc_epc_page_from_node(nid_of_current);
+               if (page)
+                       return page;
+       }
+
+       /* Fall back to the non-local NUMA nodes: */
+       while (true) {
+               nid = next_node_in(nid, sgx_numa_mask);
+               if (nid == nid_of_current)
+                       break;
 
-               page = __sgx_alloc_epc_page_from_section(section);
+               page = __sgx_alloc_epc_page_from_node(nid);
                if (page)
                        return page;
        }
@@ -598,23 +609,22 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim)
  * sgx_free_epc_page() - Free an EPC page
  * @page:      an EPC page
  *
- * Call EREMOVE for an EPC page and insert it back to the list of free pages.
+ * Put the EPC page back to the list of free pages. It's the caller's
+ * responsibility to make sure that the page is in uninitialized state. In other
+ * words, do EREMOVE, EWB or whatever operation is necessary before calling
+ * this function.
  */
 void sgx_free_epc_page(struct sgx_epc_page *page)
 {
        struct sgx_epc_section *section = &sgx_epc_sections[page->section];
-       int ret;
+       struct sgx_numa_node *node = section->node;
 
-       WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
+       spin_lock(&node->lock);
 
-       ret = __eremove(sgx_get_epc_virt_addr(page));
-       if (WARN_ONCE(ret, "EREMOVE returned %d (0x%x)", ret, ret))
-               return;
+       list_add_tail(&page->list, &node->free_page_list);
+       sgx_nr_free_pages++;
 
-       spin_lock(&section->lock);
-       list_add_tail(&page->list, &section->page_list);
-       section->free_cnt++;
-       spin_unlock(&section->lock);
+       spin_unlock(&node->lock);
 }
 
 static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
@@ -635,18 +645,15 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
        }
 
        section->phys_addr = phys_addr;
-       spin_lock_init(&section->lock);
-       INIT_LIST_HEAD(&section->page_list);
-       INIT_LIST_HEAD(&section->init_laundry_list);
 
        for (i = 0; i < nr_pages; i++) {
                section->pages[i].section = index;
                section->pages[i].flags = 0;
                section->pages[i].owner = NULL;
-               list_add_tail(&section->pages[i].list, &section->init_laundry_list);
+               list_add_tail(&section->pages[i].list, &sgx_dirty_page_list);
        }
 
-       section->free_cnt = nr_pages;
+       sgx_nr_free_pages += nr_pages;
        return true;
 }
 
@@ -665,8 +672,13 @@ static bool __init sgx_page_cache_init(void)
 {
        u32 eax, ebx, ecx, edx, type;
        u64 pa, size;
+       int nid;
        int i;
 
+       sgx_numa_nodes = kmalloc_array(num_possible_nodes(), sizeof(*sgx_numa_nodes), GFP_KERNEL);
+       if (!sgx_numa_nodes)
+               return false;
+
        for (i = 0; i < ARRAY_SIZE(sgx_epc_sections); i++) {
                cpuid_count(SGX_CPUID, i + SGX_CPUID_EPC, &eax, &ebx, &ecx, &edx);
 
@@ -689,6 +701,21 @@ static bool __init sgx_page_cache_init(void)
                        break;
                }
 
+               nid = numa_map_to_online_node(phys_to_target_node(pa));
+               if (nid == NUMA_NO_NODE) {
+                       /* The physical address is already printed above. */
+                       pr_warn(FW_BUG "Unable to map EPC section to online node. Fallback to the NUMA node 0.\n");
+                       nid = 0;
+               }
+
+               if (!node_isset(nid, sgx_numa_mask)) {
+                       spin_lock_init(&sgx_numa_nodes[nid].lock);
+                       INIT_LIST_HEAD(&sgx_numa_nodes[nid].free_page_list);
+                       node_set(nid, sgx_numa_mask);
+               }
+
+               sgx_epc_sections[i].node =  &sgx_numa_nodes[nid];
+
                sgx_nr_epc_sections++;
        }