Merge tag 'drm-next-2019-09-27' of git://anongit.freedesktop.org/drm/drm
[linux-2.6-microblaze.git] / mm / huge_memory.c
index de1f159..73fc517 100644 (file)
@@ -496,11 +496,25 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
        return pmd;
 }
 
-static inline struct list_head *page_deferred_list(struct page *page)
+#ifdef CONFIG_MEMCG
+static inline struct deferred_split *get_deferred_split_queue(struct page *page)
 {
-       /* ->lru in the tail pages is occupied by compound_head. */
-       return &page[2].deferred_list;
+       struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+       struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+
+       if (memcg)
+               return &memcg->deferred_split_queue;
+       else
+               return &pgdat->deferred_split_queue;
+}
+#else
+static inline struct deferred_split *get_deferred_split_queue(struct page *page)
+{
+       struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+
+       return &pgdat->deferred_split_queue;
 }
+#endif
 
 void prep_transhuge_page(struct page *page)
 {
@@ -2497,6 +2511,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        struct page *head = compound_head(page);
        pg_data_t *pgdat = page_pgdat(head);
        struct lruvec *lruvec;
+       struct address_space *swap_cache = NULL;
+       unsigned long offset = 0;
        int i;
 
        lruvec = mem_cgroup_page_lruvec(head, pgdat);
@@ -2504,6 +2520,14 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        /* complete memcg works before add pages to LRU */
        mem_cgroup_split_huge_fixup(head);
 
+       if (PageAnon(head) && PageSwapCache(head)) {
+               swp_entry_t entry = { .val = page_private(head) };
+
+               offset = swp_offset(entry);
+               swap_cache = swap_address_space(entry);
+               xa_lock(&swap_cache->i_pages);
+       }
+
        for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
                __split_huge_page_tail(head, i, lruvec, list);
                /* Some pages can be beyond i_size: drop them from page cache */
@@ -2513,6 +2537,12 @@ static void __split_huge_page(struct page *page, struct list_head *list,
                        if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head))
                                shmem_uncharge(head->mapping->host, 1);
                        put_page(head + i);
+               } else if (!PageAnon(page)) {
+                       __xa_store(&head->mapping->i_pages, head[i].index,
+                                       head + i, 0);
+               } else if (swap_cache) {
+                       __xa_store(&swap_cache->i_pages, offset + i,
+                                       head + i, 0);
                }
        }
 
@@ -2523,10 +2553,12 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        /* See comment in __split_huge_page_tail() */
        if (PageAnon(head)) {
                /* Additional pin to swap cache */
-               if (PageSwapCache(head))
+               if (PageSwapCache(head)) {
                        page_ref_add(head, 2);
-               else
+                       xa_unlock(&swap_cache->i_pages);
+               } else {
                        page_ref_inc(head);
+               }
        } else {
                /* Additional pin to page cache */
                page_ref_add(head, 2);
@@ -2673,6 +2705,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 {
        struct page *head = compound_head(page);
        struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
+       struct deferred_split *ds_queue = get_deferred_split_queue(page);
        struct anon_vma *anon_vma = NULL;
        struct address_space *mapping = NULL;
        int count, mapcount, extra_pins, ret;
@@ -2759,17 +2792,17 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
        }
 
        /* Prevent deferred_split_scan() touching ->_refcount */
-       spin_lock(&pgdata->split_queue_lock);
+       spin_lock(&ds_queue->split_queue_lock);
        count = page_count(head);
        mapcount = total_mapcount(head);
        if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
                if (!list_empty(page_deferred_list(head))) {
-                       pgdata->split_queue_len--;
+                       ds_queue->split_queue_len--;
                        list_del(page_deferred_list(head));
                }
                if (mapping)
                        __dec_node_page_state(page, NR_SHMEM_THPS);
-               spin_unlock(&pgdata->split_queue_lock);
+               spin_unlock(&ds_queue->split_queue_lock);
                __split_huge_page(page, list, end, flags);
                if (PageSwapCache(head)) {
                        swp_entry_t entry = { .val = page_private(head) };
@@ -2786,7 +2819,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                        dump_page(page, "total_mapcount(head) > 0");
                        BUG();
                }
-               spin_unlock(&pgdata->split_queue_lock);
+               spin_unlock(&ds_queue->split_queue_lock);
 fail:          if (mapping)
                        xa_unlock(&mapping->i_pages);
                spin_unlock_irqrestore(&pgdata->lru_lock, flags);
@@ -2808,53 +2841,86 @@ out:
 
 void free_transhuge_page(struct page *page)
 {
-       struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
+       struct deferred_split *ds_queue = get_deferred_split_queue(page);
        unsigned long flags;
 
-       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+       spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
        if (!list_empty(page_deferred_list(page))) {
-               pgdata->split_queue_len--;
+               ds_queue->split_queue_len--;
                list_del(page_deferred_list(page));
        }
-       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+       spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
        free_compound_page(page);
 }
 
 void deferred_split_huge_page(struct page *page)
 {
-       struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
+       struct deferred_split *ds_queue = get_deferred_split_queue(page);
+#ifdef CONFIG_MEMCG
+       struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+#endif
        unsigned long flags;
 
        VM_BUG_ON_PAGE(!PageTransHuge(page), page);
 
-       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+       /*
+        * The try_to_unmap() in page reclaim path might reach here too,
+        * this may cause a race condition to corrupt deferred split queue.
+        * And, if page reclaim is already handling the same page, it is
+        * unnecessary to handle it again in shrinker.
+        *
+        * Check PageSwapCache to determine if the page is being
+        * handled by page reclaim since THP swap would add the page into
+        * swap cache before calling try_to_unmap().
+        */
+       if (PageSwapCache(page))
+               return;
+
+       spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
        if (list_empty(page_deferred_list(page))) {
                count_vm_event(THP_DEFERRED_SPLIT_PAGE);
-               list_add_tail(page_deferred_list(page), &pgdata->split_queue);
-               pgdata->split_queue_len++;
+               list_add_tail(page_deferred_list(page), &ds_queue->split_queue);
+               ds_queue->split_queue_len++;
+#ifdef CONFIG_MEMCG
+               if (memcg)
+                       memcg_set_shrinker_bit(memcg, page_to_nid(page),
+                                              deferred_split_shrinker.id);
+#endif
        }
-       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+       spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
 }
 
 static unsigned long deferred_split_count(struct shrinker *shrink,
                struct shrink_control *sc)
 {
        struct pglist_data *pgdata = NODE_DATA(sc->nid);
-       return READ_ONCE(pgdata->split_queue_len);
+       struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
+
+#ifdef CONFIG_MEMCG
+       if (sc->memcg)
+               ds_queue = &sc->memcg->deferred_split_queue;
+#endif
+       return READ_ONCE(ds_queue->split_queue_len);
 }
 
 static unsigned long deferred_split_scan(struct shrinker *shrink,
                struct shrink_control *sc)
 {
        struct pglist_data *pgdata = NODE_DATA(sc->nid);
+       struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
        unsigned long flags;
        LIST_HEAD(list), *pos, *next;
        struct page *page;
        int split = 0;
 
-       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+#ifdef CONFIG_MEMCG
+       if (sc->memcg)
+               ds_queue = &sc->memcg->deferred_split_queue;
+#endif
+
+       spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
        /* Take pin on all head pages to avoid freeing them under us */
-       list_for_each_safe(pos, next, &pgdata->split_queue) {
+       list_for_each_safe(pos, next, &ds_queue->split_queue) {
                page = list_entry((void *)pos, struct page, mapping);
                page = compound_head(page);
                if (get_page_unless_zero(page)) {
@@ -2862,12 +2928,12 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
                } else {
                        /* We lost race with put_compound_page() */
                        list_del_init(page_deferred_list(page));
-                       pgdata->split_queue_len--;
+                       ds_queue->split_queue_len--;
                }
                if (!--sc->nr_to_scan)
                        break;
        }
-       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+       spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
 
        list_for_each_safe(pos, next, &list) {
                page = list_entry((void *)pos, struct page, mapping);
@@ -2881,15 +2947,15 @@ next:
                put_page(page);
        }
 
-       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
-       list_splice_tail(&list, &pgdata->split_queue);
-       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+       spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+       list_splice_tail(&list, &ds_queue->split_queue);
+       spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
 
        /*
         * Stop shrinker if we didn't split any page, but the queue is empty.
         * This can happen if pages were freed under us.
         */
-       if (!split && list_empty(&pgdata->split_queue))
+       if (!split && list_empty(&ds_queue->split_queue))
                return SHRINK_STOP;
        return split;
 }
@@ -2898,7 +2964,8 @@ static struct shrinker deferred_split_shrinker = {
        .count_objects = deferred_split_count,
        .scan_objects = deferred_split_scan,
        .seeks = DEFAULT_SEEKS,
-       .flags = SHRINKER_NUMA_AWARE,
+       .flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE |
+                SHRINKER_NONSLAB,
 };
 
 #ifdef CONFIG_DEBUG_FS