Merge tag 'thermal-v5.11-2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / mm / huge_memory.c
index ec2bb93..9237976 100644 (file)
@@ -163,12 +163,17 @@ static struct shrinker huge_zero_page_shrinker = {
 static ssize_t enabled_show(struct kobject *kobj,
                            struct kobj_attribute *attr, char *buf)
 {
+       const char *output;
+
        if (test_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags))
-               return sprintf(buf, "[always] madvise never\n");
-       else if (test_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags))
-               return sprintf(buf, "always [madvise] never\n");
+               output = "[always] madvise never";
+       else if (test_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
+                         &transparent_hugepage_flags))
+               output = "always [madvise] never";
        else
-               return sprintf(buf, "always madvise [never]\n");
+               output = "always madvise [never]";
+
+       return sysfs_emit(buf, "%s\n", output);
 }
 
 static ssize_t enabled_store(struct kobject *kobj,
@@ -200,11 +205,11 @@ static struct kobj_attribute enabled_attr =
        __ATTR(enabled, 0644, enabled_show, enabled_store);
 
 ssize_t single_hugepage_flag_show(struct kobject *kobj,
-                               struct kobj_attribute *attr, char *buf,
-                               enum transparent_hugepage_flag flag)
+                                 struct kobj_attribute *attr, char *buf,
+                                 enum transparent_hugepage_flag flag)
 {
-       return sprintf(buf, "%d\n",
-                      !!test_bit(flag, &transparent_hugepage_flags));
+       return sysfs_emit(buf, "%d\n",
+                         !!test_bit(flag, &transparent_hugepage_flags));
 }
 
 ssize_t single_hugepage_flag_store(struct kobject *kobj,
@@ -232,15 +237,24 @@ ssize_t single_hugepage_flag_store(struct kobject *kobj,
 static ssize_t defrag_show(struct kobject *kobj,
                           struct kobj_attribute *attr, char *buf)
 {
-       if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
-               return sprintf(buf, "[always] defer defer+madvise madvise never\n");
-       if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
-               return sprintf(buf, "always [defer] defer+madvise madvise never\n");
-       if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
-               return sprintf(buf, "always defer [defer+madvise] madvise never\n");
-       if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
-               return sprintf(buf, "always defer defer+madvise [madvise] never\n");
-       return sprintf(buf, "always defer defer+madvise madvise [never]\n");
+       const char *output;
+
+       if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
+                    &transparent_hugepage_flags))
+               output = "[always] defer defer+madvise madvise never";
+       else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
+                         &transparent_hugepage_flags))
+               output = "always [defer] defer+madvise madvise never";
+       else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG,
+                         &transparent_hugepage_flags))
+               output = "always defer [defer+madvise] madvise never";
+       else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
+                         &transparent_hugepage_flags))
+               output = "always defer defer+madvise [madvise] never";
+       else
+               output = "always defer defer+madvise madvise [never]";
+
+       return sysfs_emit(buf, "%s\n", output);
 }
 
 static ssize_t defrag_store(struct kobject *kobj,
@@ -281,10 +295,10 @@ static struct kobj_attribute defrag_attr =
        __ATTR(defrag, 0644, defrag_show, defrag_store);
 
 static ssize_t use_zero_page_show(struct kobject *kobj,
-               struct kobj_attribute *attr, char *buf)
+                                 struct kobj_attribute *attr, char *buf)
 {
        return single_hugepage_flag_show(kobj, attr, buf,
-                               TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
+                                        TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
 }
 static ssize_t use_zero_page_store(struct kobject *kobj,
                struct kobj_attribute *attr, const char *buf, size_t count)
@@ -296,9 +310,9 @@ static struct kobj_attribute use_zero_page_attr =
        __ATTR(use_zero_page, 0644, use_zero_page_show, use_zero_page_store);
 
 static ssize_t hpage_pmd_size_show(struct kobject *kobj,
-               struct kobj_attribute *attr, char *buf)
+                                  struct kobj_attribute *attr, char *buf)
 {
-       return sprintf(buf, "%lu\n", HPAGE_PMD_SIZE);
+       return sysfs_emit(buf, "%lu\n", HPAGE_PMD_SIZE);
 }
 static struct kobj_attribute hpage_pmd_size_attr =
        __ATTR_RO(hpage_pmd_size);
@@ -470,7 +484,7 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
 #ifdef CONFIG_MEMCG
 static inline struct deferred_split *get_deferred_split_queue(struct page *page)
 {
-       struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+       struct mem_cgroup *memcg = page_memcg(compound_head(page));
        struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
 
        if (memcg)
@@ -2321,7 +2335,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
 
 static void unmap_page(struct page *page)
 {
-       enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
+       enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK |
                TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
        bool unmap_success;
 
@@ -2345,6 +2359,27 @@ static void remap_page(struct page *page, unsigned int nr)
        }
 }
 
+static void lru_add_page_tail(struct page *head, struct page *tail,
+               struct lruvec *lruvec, struct list_head *list)
+{
+       VM_BUG_ON_PAGE(!PageHead(head), head);
+       VM_BUG_ON_PAGE(PageCompound(tail), head);
+       VM_BUG_ON_PAGE(PageLRU(tail), head);
+       lockdep_assert_held(&lruvec->lru_lock);
+
+       if (list) {
+               /* page reclaim is reclaiming a huge page */
+               VM_WARN_ON(PageLRU(head));
+               get_page(tail);
+               list_add_tail(&tail->lru, list);
+       } else {
+               /* head is still on lru (and we have it frozen) */
+               VM_WARN_ON(!PageLRU(head));
+               SetPageLRU(tail);
+               list_add_tail(&tail->lru, &head->lru);
+       }
+}
+
 static void __split_huge_page_tail(struct page *head, int tail,
                struct lruvec *lruvec, struct list_head *list)
 {
@@ -2356,7 +2391,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
         * Clone page flags before unfreezing refcount.
         *
         * After successful get_page_unless_zero() might follow flags change,
-        * for exmaple lock_page() which set PG_waiters.
+        * for example lock_page() which set PG_waiters.
         */
        page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
        page_tail->flags |= (head->flags &
@@ -2411,18 +2446,15 @@ static void __split_huge_page_tail(struct page *head, int tail,
 }
 
 static void __split_huge_page(struct page *page, struct list_head *list,
-               pgoff_t end, unsigned long flags)
+               pgoff_t end)
 {
        struct page *head = compound_head(page);
-       pg_data_t *pgdat = page_pgdat(head);
        struct lruvec *lruvec;
        struct address_space *swap_cache = NULL;
        unsigned long offset = 0;
        unsigned int nr = thp_nr_pages(head);
        int i;
 
-       lruvec = mem_cgroup_page_lruvec(head, pgdat);
-
        /* complete memcg works before add pages to LRU */
        mem_cgroup_split_huge_fixup(head);
 
@@ -2434,6 +2466,9 @@ static void __split_huge_page(struct page *page, struct list_head *list,
                xa_lock(&swap_cache->i_pages);
        }
 
+       /* lock lru list/PageCompound, ref freezed by page_ref_freeze */
+       lruvec = lock_page_lruvec(head);
+
        for (i = nr - 1; i >= 1; i--) {
                __split_huge_page_tail(head, i, lruvec, list);
                /* Some pages can be beyond i_size: drop them from page cache */
@@ -2453,6 +2488,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        }
 
        ClearPageCompound(head);
+       unlock_page_lruvec(lruvec);
+       /* Caller disabled irqs, so they are still disabled here */
 
        split_page_owner(head, nr);
 
@@ -2470,8 +2507,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
                page_ref_add(head, 2);
                xa_unlock(&head->mapping->i_pages);
        }
-
-       spin_unlock_irqrestore(&pgdat->lru_lock, flags);
+       local_irq_enable();
 
        remap_page(head, nr);
 
@@ -2617,12 +2653,10 @@ bool can_split_huge_page(struct page *page, int *pextra_pins)
 int split_huge_page_to_list(struct page *page, struct list_head *list)
 {
        struct page *head = compound_head(page);
-       struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
        struct deferred_split *ds_queue = get_deferred_split_queue(head);
        struct anon_vma *anon_vma = NULL;
        struct address_space *mapping = NULL;
        int count, mapcount, extra_pins, ret;
-       unsigned long flags;
        pgoff_t end;
 
        VM_BUG_ON_PAGE(is_huge_zero_page(head), head);
@@ -2683,9 +2717,8 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
        unmap_page(head);
        VM_BUG_ON_PAGE(compound_mapcount(head), head);
 
-       /* prevent PageLRU to go away from under us, and freeze lru stats */
-       spin_lock_irqsave(&pgdata->lru_lock, flags);
-
+       /* block interrupt reentry in xa_lock and spinlock */
+       local_irq_disable();
        if (mapping) {
                XA_STATE(xas, &mapping->i_pages, page_index(head));
 
@@ -2710,12 +2743,12 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                spin_unlock(&ds_queue->split_queue_lock);
                if (mapping) {
                        if (PageSwapBacked(head))
-                               __dec_node_page_state(head, NR_SHMEM_THPS);
+                               __dec_lruvec_page_state(head, NR_SHMEM_THPS);
                        else
-                               __dec_node_page_state(head, NR_FILE_THPS);
+                               __dec_lruvec_page_state(head, NR_FILE_THPS);
                }
 
-               __split_huge_page(page, list, end, flags);
+               __split_huge_page(page, list, end);
                ret = 0;
        } else {
                if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
@@ -2729,7 +2762,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                spin_unlock(&ds_queue->split_queue_lock);
 fail:          if (mapping)
                        xa_unlock(&mapping->i_pages);
-               spin_unlock_irqrestore(&pgdata->lru_lock, flags);
+               local_irq_enable();
                remap_page(head, thp_nr_pages(head));
                ret = -EBUSY;
        }
@@ -2764,7 +2797,7 @@ void deferred_split_huge_page(struct page *page)
 {
        struct deferred_split *ds_queue = get_deferred_split_queue(page);
 #ifdef CONFIG_MEMCG
-       struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+       struct mem_cgroup *memcg = page_memcg(compound_head(page));
 #endif
        unsigned long flags;