mm/lru: replace pgdat lru_lock with lruvec lock

author Alex Shi <alex.shi@linux.alibaba.com>

Tue, 15 Dec 2020 20:34:29 +0000 (12:34 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 15 Dec 2020 22:48:04 +0000 (14:48 -0800)
author Alex Shi <alex.shi@linux.alibaba.com>
Tue, 15 Dec 2020 20:34:29 +0000 (12:34 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 15 Dec 2020 22:48:04 +0000 (14:48 -0800)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h

index f530d63..aa5d559 100644 (file)
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -491,6 +491,19 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
  
  struct mem_cgroup *get_mem_cgroup_from_page(struct page *page);
  
+struct lruvec *lock_page_lruvec(struct page *page);
+struct lruvec *lock_page_lruvec_irq(struct page *page);
+struct lruvec *lock_page_lruvec_irqsave(struct page *page,
+                                               unsigned long *flags);
+
+#ifdef CONFIG_DEBUG_VM
+void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page);
+#else
+static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
+{
+}
+#endif
+
  static inline
  struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
         return css ? container_of(css, struct mem_cgroup, css) : NULL;
@@ -996,6 +1009,31 @@ static inline void mem_cgroup_put(struct mem_cgroup *memcg)
  {
  }
  
+static inline struct lruvec *lock_page_lruvec(struct page *page)
+{
+       struct pglist_data *pgdat = page_pgdat(page);
+
+       spin_lock(&pgdat->__lruvec.lru_lock);
+       return &pgdat->__lruvec;
+}
+
+static inline struct lruvec *lock_page_lruvec_irq(struct page *page)
+{
+       struct pglist_data *pgdat = page_pgdat(page);
+
+       spin_lock_irq(&pgdat->__lruvec.lru_lock);
+       return &pgdat->__lruvec;
+}
+
+static inline struct lruvec *lock_page_lruvec_irqsave(struct page *page,
+               unsigned long *flagsp)
+{
+       struct pglist_data *pgdat = page_pgdat(page);
+
+       spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp);
+       return &pgdat->__lruvec;
+}
+
  static inline struct mem_cgroup *
  mem_cgroup_iter(struct mem_cgroup *root,
                 struct mem_cgroup *prev,
@@ -1215,6 +1253,10 @@ static inline
  void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
  {
  }
+
+static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
+{
+}
  #endif /* CONFIG_MEMCG */
  
  /* idx can be of type enum memcg_stat_item or node_stat_item */
@@ -1296,6 +1338,22 @@ static inline struct lruvec *parent_lruvec(struct lruvec *lruvec)
         return mem_cgroup_lruvec(memcg, lruvec_pgdat(lruvec));
  }
  
+static inline void unlock_page_lruvec(struct lruvec *lruvec)
+{
+       spin_unlock(&lruvec->lru_lock);
+}
+
+static inline void unlock_page_lruvec_irq(struct lruvec *lruvec)
+{
+       spin_unlock_irq(&lruvec->lru_lock);
+}
+
+static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
+               unsigned long flags)
+{
+       spin_unlock_irqrestore(&lruvec->lru_lock, flags);
+}
+
  #ifdef CONFIG_CGROUP_WRITEBACK
  
  struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h

index 98a80c0..9da23c0 100644 (file)
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -276,6 +276,8 @@ enum lruvec_flags {
  
  struct lruvec {
         struct list_head                lists[NR_LRU_LISTS];
+       /* per lruvec lru_lock for memcg */
+       spinlock_t                      lru_lock;
         /*
          * These track the cost of reclaiming one LRU - file or anon -
          * over the other. As the observed cost of reclaiming one LRU
@@ -782,7 +784,6 @@ typedef struct pglist_data {
  
         /* Write-intensive fields used by page reclaim */
         ZONE_PADDING(_pad1_)
-       spinlock_t              lru_lock;
  
  #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
         /*
diff --git a/mm/compaction.c b/mm/compaction.c

index 50938e6..e5acb97 100644 (file)
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -804,7 +804,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
         unsigned long nr_scanned = 0, nr_isolated = 0;
         struct lruvec *lruvec;
         unsigned long flags = 0;
-       bool locked = false;
+       struct lruvec *locked = NULL;
         struct page *page = NULL, *valid_page = NULL;
         unsigned long start_pfn = low_pfn;
         bool skip_on_failure = false;
@@ -868,11 +868,20 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                  * contention, to give chance to IRQs. Abort completely if
                  * a fatal signal is pending.
                  */
-               if (!(low_pfn % SWAP_CLUSTER_MAX)
-                   && compact_unlock_should_abort(&pgdat->lru_lock,
-                                           flags, &locked, cc)) {
-                       low_pfn = 0;
-                       goto fatal_pending;
+               if (!(low_pfn % SWAP_CLUSTER_MAX)) {
+                       if (locked) {
+                               unlock_page_lruvec_irqrestore(locked, flags);
+                               locked = NULL;
+                       }
+
+                       if (fatal_signal_pending(current)) {
+                               cc->contended = true;
+
+                               low_pfn = 0;
+                               goto fatal_pending;
+                       }
+
+                       cond_resched();
                 }
  
                 if (!pfn_valid_within(low_pfn))
@@ -944,9 +953,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                         if (unlikely(__PageMovable(page)) &&
                                         !PageIsolated(page)) {
                                 if (locked) {
-                                       spin_unlock_irqrestore(&pgdat->lru_lock,
-                                                                       flags);
-                                       locked = false;
+                                       unlock_page_lruvec_irqrestore(locked, flags);
+                                       locked = NULL;
                                 }
  
                                 if (!isolate_movable_page(page, isolate_mode))
@@ -987,10 +995,19 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                 if (!TestClearPageLRU(page))
                         goto isolate_fail_put;
  
+               rcu_read_lock();
+               lruvec = mem_cgroup_page_lruvec(page, pgdat);
+
                 /* If we already hold the lock, we can skip some rechecking */
-               if (!locked) {
-                       locked = compact_lock_irqsave(&pgdat->lru_lock,
-                                                               &flags, cc);
+               if (lruvec != locked) {
+                       if (locked)
+                               unlock_page_lruvec_irqrestore(locked, flags);
+
+                       compact_lock_irqsave(&lruvec->lru_lock, &flags, cc);
+                       locked = lruvec;
+                       rcu_read_unlock();
+
+                       lruvec_memcg_debug(lruvec, page);
  
                         /* Try get exclusive access under lock */
                         if (!skip_updated) {
@@ -1009,9 +1026,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                                 SetPageLRU(page);
                                 goto isolate_fail_put;
                         }
-               }
-
-               lruvec = mem_cgroup_page_lruvec(page, pgdat);
+               } else
+                       rcu_read_unlock();
  
                 /* The whole page is taken off the LRU; skip the tail pages. */
                 if (PageCompound(page))
@@ -1045,8 +1061,8 @@ isolate_success:
  isolate_fail_put:
                 /* Avoid potential deadlock in freeing page under lru_lock */
                 if (locked) {
-                       spin_unlock_irqrestore(&pgdat->lru_lock, flags);
-                       locked = false;
+                       unlock_page_lruvec_irqrestore(locked, flags);
+                       locked = NULL;
                 }
                 put_page(page);
  
@@ -1061,8 +1077,8 @@ isolate_fail:
                  */
                 if (nr_isolated) {
                         if (locked) {
-                               spin_unlock_irqrestore(&pgdat->lru_lock, flags);
-                               locked = false;
+                               unlock_page_lruvec_irqrestore(locked, flags);
+                               locked = NULL;
                         }
                         putback_movable_pages(&cc->migratepages);
                         cc->nr_migratepages = 0;
@@ -1090,7 +1106,7 @@ isolate_fail:
  
  isolate_abort:
         if (locked)
-               spin_unlock_irqrestore(&pgdat->lru_lock, flags);
+               unlock_page_lruvec_irqrestore(locked, flags);
         if (page) {
                 SetPageLRU(page);
                 put_page(page);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index a59333a..3c4a8fc 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2365,7 +2365,7 @@ static void lru_add_page_tail(struct page *head, struct page *tail,
         VM_BUG_ON_PAGE(!PageHead(head), head);
         VM_BUG_ON_PAGE(PageCompound(tail), head);
         VM_BUG_ON_PAGE(PageLRU(tail), head);
-       lockdep_assert_held(&lruvec_pgdat(lruvec)->lru_lock);
+       lockdep_assert_held(&lruvec->lru_lock);
  
         if (list) {
                 /* page reclaim is reclaiming a huge page */
@@ -2449,7 +2449,6 @@ static void __split_huge_page(struct page *page, struct list_head *list,
                 pgoff_t end)
  {
         struct page *head = compound_head(page);
-       pg_data_t *pgdat = page_pgdat(head);
         struct lruvec *lruvec;
         struct address_space *swap_cache = NULL;
         unsigned long offset = 0;
@@ -2467,10 +2466,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
                 xa_lock(&swap_cache->i_pages);
         }
  
-       /* prevent PageLRU to go away from under us, and freeze lru stats */
-       spin_lock(&pgdat->lru_lock);
-
-       lruvec = mem_cgroup_page_lruvec(head, pgdat);
+       /* lock lru list/PageCompound, ref freezed by page_ref_freeze */
+       lruvec = lock_page_lruvec(head);
  
         for (i = nr - 1; i >= 1; i--) {
                 __split_huge_page_tail(head, i, lruvec, list);
@@ -2491,7 +2488,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
         }
  
         ClearPageCompound(head);
-       spin_unlock(&pgdat->lru_lock);
+       unlock_page_lruvec(lruvec);
         /* Caller disabled irqs, so they are still disabled here */
  
         split_page_owner(head, nr);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 827879d..2f7824d 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -20,6 +20,9 @@
   * Lockless page tracking & accounting
   * Unified hierarchy configuration model
   * Copyright (C) 2015 Red Hat, Inc., Johannes Weiner
+ *
+ * Per memcg lru locking
+ * Copyright (C) 2020 Alibaba, Inc, Alex Shi
   */
  
  #include <linux/page_counter.h>
@@ -1330,6 +1333,23 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
         return ret;
  }
  
+#ifdef CONFIG_DEBUG_VM
+void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
+{
+       struct mem_cgroup *memcg;
+
+       if (mem_cgroup_disabled())
+               return;
+
+       memcg = page_memcg(page);
+
+       if (!memcg)
+               VM_BUG_ON_PAGE(lruvec_memcg(lruvec) != root_mem_cgroup, page);
+       else
+               VM_BUG_ON_PAGE(lruvec_memcg(lruvec) != memcg, page);
+}
+#endif
+
  /**
   * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
   * @page: the page
@@ -1370,6 +1390,60 @@ out:
         return lruvec;
  }
  
+/**
+ * lock_page_lruvec - lock and return lruvec for a given page.
+ * @page: the page
+ *
+ * This series functions should be used in either conditions:
+ * PageLRU is cleared or unset
+ * or page->_refcount is zero
+ * or page is locked.
+ */
+struct lruvec *lock_page_lruvec(struct page *page)
+{
+       struct lruvec *lruvec;
+       struct pglist_data *pgdat = page_pgdat(page);
+
+       rcu_read_lock();
+       lruvec = mem_cgroup_page_lruvec(page, pgdat);
+       spin_lock(&lruvec->lru_lock);
+       rcu_read_unlock();
+
+       lruvec_memcg_debug(lruvec, page);
+
+       return lruvec;
+}
+
+struct lruvec *lock_page_lruvec_irq(struct page *page)
+{
+       struct lruvec *lruvec;
+       struct pglist_data *pgdat = page_pgdat(page);
+
+       rcu_read_lock();
+       lruvec = mem_cgroup_page_lruvec(page, pgdat);
+       spin_lock_irq(&lruvec->lru_lock);
+       rcu_read_unlock();
+
+       lruvec_memcg_debug(lruvec, page);
+
+       return lruvec;
+}
+
+struct lruvec *lock_page_lruvec_irqsave(struct page *page, unsigned long *flags)
+{
+       struct lruvec *lruvec;
+       struct pglist_data *pgdat = page_pgdat(page);
+
+       rcu_read_lock();
+       lruvec = mem_cgroup_page_lruvec(page, pgdat);
+       spin_lock_irqsave(&lruvec->lru_lock, *flags);
+       rcu_read_unlock();
+
+       lruvec_memcg_debug(lruvec, page);
+
+       return lruvec;
+}
+
  /**
   * mem_cgroup_update_lru_size - account for adding or removing an lru page
   * @lruvec: mem_cgroup per zone lru vector
@@ -3281,10 +3355,8 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
  #endif /* CONFIG_MEMCG_KMEM */
  
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-
  /*
- * Because tail pages are not marked as "used", set it. We're under
- * pgdat->lru_lock and migration entries setup in all page mappings.
+ * Because page_memcg(head) is not set on compound tails, set it now.
   */
  void mem_cgroup_split_huge_fixup(struct page *head)
  {
diff --git a/mm/mlock.c b/mm/mlock.c

index 7b0e633..ab164a6 100644 (file)
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -262,12 +262,12 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
         int nr = pagevec_count(pvec);
         int delta_munlocked = -nr;
         struct pagevec pvec_putback;
+       struct lruvec *lruvec = NULL;
         int pgrescued = 0;
  
         pagevec_init(&pvec_putback);
  
         /* Phase 1: page isolation */
-       spin_lock_irq(&zone->zone_pgdat->lru_lock);
         for (i = 0; i < nr; i++) {
                 struct page *page = pvec->pages[i];
  
@@ -277,10 +277,16 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
                          * so we can spare the get_page() here.
                          */
                         if (TestClearPageLRU(page)) {
-                               struct lruvec *lruvec;
+                               struct lruvec *new_lruvec;
+
+                               new_lruvec = mem_cgroup_page_lruvec(page,
+                                               page_pgdat(page));
+                               if (new_lruvec != lruvec) {
+                                       if (lruvec)
+                                               unlock_page_lruvec_irq(lruvec);
+                                       lruvec = lock_page_lruvec_irq(page);
+                               }
  
-                               lruvec = mem_cgroup_page_lruvec(page,
-                                                       page_pgdat(page));
                                 del_page_from_lru_list(page, lruvec,
                                                         page_lru(page));
                                 continue;
@@ -299,8 +305,12 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
                 pagevec_add(&pvec_putback, pvec->pages[i]);
                 pvec->pages[i] = NULL;
         }
-       __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
-       spin_unlock_irq(&zone->zone_pgdat->lru_lock);
+       if (lruvec) {
+               __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
+               unlock_page_lruvec_irq(lruvec);
+       } else if (delta_munlocked) {
+               mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
+       }
  
         /* Now we can release pins of pages that we are not munlocking */
         pagevec_release(&pvec_putback);
diff --git a/mm/mmzone.c b/mm/mmzone.c

index f337831..eb89d6e 100644 (file)
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -77,6 +77,7 @@ void lruvec_init(struct lruvec *lruvec)
         enum lru_list lru;
  
         memset(lruvec, 0, sizeof(struct lruvec));
+       spin_lock_init(&lruvec->lru_lock);
  
         for_each_lru(lru)
                 INIT_LIST_HEAD(&lruvec->lists[lru]);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index b632945..b1cc2b7 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6870,7 +6870,6 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
         init_waitqueue_head(&pgdat->pfmemalloc_wait);
  
         pgdat_page_ext_init(pgdat);
-       spin_lock_init(&pgdat->lru_lock);
         lruvec_init(&pgdat->__lruvec);
  }
  
diff --git a/mm/swap.c b/mm/swap.c

index d952af7..ba9fc21 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -79,16 +79,14 @@ static DEFINE_PER_CPU(struct lru_pvecs, lru_pvecs) = {
  static void __page_cache_release(struct page *page)
  {
         if (PageLRU(page)) {
-               pg_data_t *pgdat = page_pgdat(page);
                 struct lruvec *lruvec;
                 unsigned long flags;
  
-               spin_lock_irqsave(&pgdat->lru_lock, flags);
-               lruvec = mem_cgroup_page_lruvec(page, pgdat);
+               lruvec = lock_page_lruvec_irqsave(page, &flags);
                 VM_BUG_ON_PAGE(!PageLRU(page), page);
                 __ClearPageLRU(page);
                 del_page_from_lru_list(page, lruvec, page_off_lru(page));
-               spin_unlock_irqrestore(&pgdat->lru_lock, flags);
+               unlock_page_lruvec_irqrestore(lruvec, flags);
         }
         __ClearPageWaiters(page);
  }
@@ -207,32 +205,30 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
         void (*move_fn)(struct page *page, struct lruvec *lruvec))
  {
         int i;
-       struct pglist_data *pgdat = NULL;
-       struct lruvec *lruvec;
+       struct lruvec *lruvec = NULL;
         unsigned long flags = 0;
  
         for (i = 0; i < pagevec_count(pvec); i++) {
                 struct page *page = pvec->pages[i];
-               struct pglist_data *pagepgdat = page_pgdat(page);
-
-               if (pagepgdat != pgdat) {
-                       if (pgdat)
-                               spin_unlock_irqrestore(&pgdat->lru_lock, flags);
-                       pgdat = pagepgdat;
-                       spin_lock_irqsave(&pgdat->lru_lock, flags);
-               }
+               struct lruvec *new_lruvec;
  
                 /* block memcg migration during page moving between lru */
                 if (!TestClearPageLRU(page))
                         continue;
  
-               lruvec = mem_cgroup_page_lruvec(page, pgdat);
+               new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
+               if (lruvec != new_lruvec) {
+                       if (lruvec)
+                               unlock_page_lruvec_irqrestore(lruvec, flags);
+                       lruvec = lock_page_lruvec_irqsave(page, &flags);
+               }
+
                 (*move_fn)(page, lruvec);
  
                 SetPageLRU(page);
         }
-       if (pgdat)
-               spin_unlock_irqrestore(&pgdat->lru_lock, flags);
+       if (lruvec)
+               unlock_page_lruvec_irqrestore(lruvec, flags);
         release_pages(pvec->pages, pvec->nr);
         pagevec_reinit(pvec);
  }
@@ -274,9 +270,15 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages)
  {
         do {
                 unsigned long lrusize;
-               struct pglist_data *pgdat = lruvec_pgdat(lruvec);
  
-               spin_lock_irq(&pgdat->lru_lock);
+               /*
+                * Hold lruvec->lru_lock is safe here, since
+                * 1) The pinned lruvec in reclaim, or
+                * 2) From a pre-LRU page during refault (which also holds the
+                *    rcu lock, so would be safe even if the page was on the LRU
+                *    and could move simultaneously to a new lruvec).
+                */
+               spin_lock_irq(&lruvec->lru_lock);
                 /* Record cost event */
                 if (file)
                         lruvec->file_cost += nr_pages;
@@ -300,7 +302,7 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages)
                         lruvec->file_cost /= 2;
                         lruvec->anon_cost /= 2;
                 }
-               spin_unlock_irq(&pgdat->lru_lock);
+               spin_unlock_irq(&lruvec->lru_lock);
         } while ((lruvec = parent_lruvec(lruvec)));
  }
  
@@ -364,13 +366,15 @@ static inline void activate_page_drain(int cpu)
  
  static void activate_page(struct page *page)
  {
-       pg_data_t *pgdat = page_pgdat(page);
+       struct lruvec *lruvec;
  
         page = compound_head(page);
-       spin_lock_irq(&pgdat->lru_lock);
-       if (PageLRU(page))
-               __activate_page(page, mem_cgroup_page_lruvec(page, pgdat));
-       spin_unlock_irq(&pgdat->lru_lock);
+       if (TestClearPageLRU(page)) {
+               lruvec = lock_page_lruvec_irq(page);
+               __activate_page(page, lruvec);
+               unlock_page_lruvec_irq(lruvec);
+               SetPageLRU(page);
+       }
  }
  #endif
  
@@ -860,8 +864,7 @@ void release_pages(struct page **pages, int nr)
  {
         int i;
         LIST_HEAD(pages_to_free);
-       struct pglist_data *locked_pgdat = NULL;
-       struct lruvec *lruvec;
+       struct lruvec *lruvec = NULL;
         unsigned long flags;
         unsigned int lock_batch;
  
@@ -871,11 +874,11 @@ void release_pages(struct page **pages, int nr)
                 /*
                  * Make sure the IRQ-safe lock-holding time does not get
                  * excessive with a continuous string of pages from the
-                * same pgdat. The lock is held only if pgdat != NULL.
+                * same lruvec. The lock is held only if lruvec != NULL.
                  */
-               if (locked_pgdat && ++lock_batch == SWAP_CLUSTER_MAX) {
-                       spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
-                       locked_pgdat = NULL;
+               if (lruvec && ++lock_batch == SWAP_CLUSTER_MAX) {
+                       unlock_page_lruvec_irqrestore(lruvec, flags);
+                       lruvec = NULL;
                 }
  
                 page = compound_head(page);
@@ -883,10 +886,9 @@ void release_pages(struct page **pages, int nr)
                         continue;
  
                 if (is_zone_device_page(page)) {
-                       if (locked_pgdat) {
-                               spin_unlock_irqrestore(&locked_pgdat->lru_lock,
-                                                      flags);
-                               locked_pgdat = NULL;
+                       if (lruvec) {
+                               unlock_page_lruvec_irqrestore(lruvec, flags);
+                               lruvec = NULL;
                         }
                         /*
                          * ZONE_DEVICE pages that return 'false' from
@@ -907,27 +909,27 @@ void release_pages(struct page **pages, int nr)
                         continue;
  
                 if (PageCompound(page)) {
-                       if (locked_pgdat) {
-                               spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
-                               locked_pgdat = NULL;
+                       if (lruvec) {
+                               unlock_page_lruvec_irqrestore(lruvec, flags);
+                               lruvec = NULL;
                         }
                         __put_compound_page(page);
                         continue;
                 }
  
                 if (PageLRU(page)) {
-                       struct pglist_data *pgdat = page_pgdat(page);
+                       struct lruvec *new_lruvec;
  
-                       if (pgdat != locked_pgdat) {
-                               if (locked_pgdat)
-                                       spin_unlock_irqrestore(&locked_pgdat->lru_lock,
+                       new_lruvec = mem_cgroup_page_lruvec(page,
+                                                       page_pgdat(page));
+                       if (new_lruvec != lruvec) {
+                               if (lruvec)
+                                       unlock_page_lruvec_irqrestore(lruvec,
                                                                         flags);
                                 lock_batch = 0;
-                               locked_pgdat = pgdat;
-                               spin_lock_irqsave(&locked_pgdat->lru_lock, flags);
+                               lruvec = lock_page_lruvec_irqsave(page, &flags);
                         }
  
-                       lruvec = mem_cgroup_page_lruvec(page, locked_pgdat);
                         VM_BUG_ON_PAGE(!PageLRU(page), page);
                         __ClearPageLRU(page);
                         del_page_from_lru_list(page, lruvec, page_off_lru(page));
@@ -937,8 +939,8 @@ void release_pages(struct page **pages, int nr)
  
                 list_add(&page->lru, &pages_to_free);
         }
-       if (locked_pgdat)
-               spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
+       if (lruvec)
+               unlock_page_lruvec_irqrestore(lruvec, flags);
  
         mem_cgroup_uncharge_list(&pages_to_free);
         free_unref_page_list(&pages_to_free);
@@ -1026,26 +1028,24 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec)
  void __pagevec_lru_add(struct pagevec *pvec)
  {
         int i;
-       struct pglist_data *pgdat = NULL;
-       struct lruvec *lruvec;
+       struct lruvec *lruvec = NULL;
         unsigned long flags = 0;
  
         for (i = 0; i < pagevec_count(pvec); i++) {
                 struct page *page = pvec->pages[i];
-               struct pglist_data *pagepgdat = page_pgdat(page);
+               struct lruvec *new_lruvec;
  
-               if (pagepgdat != pgdat) {
-                       if (pgdat)
-                               spin_unlock_irqrestore(&pgdat->lru_lock, flags);
-                       pgdat = pagepgdat;
-                       spin_lock_irqsave(&pgdat->lru_lock, flags);
+               new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
+               if (lruvec != new_lruvec) {
+                       if (lruvec)
+                               unlock_page_lruvec_irqrestore(lruvec, flags);
+                       lruvec = lock_page_lruvec_irqsave(page, &flags);
                 }
  
-               lruvec = mem_cgroup_page_lruvec(page, pgdat);
                 __pagevec_lru_add_fn(page, lruvec);
         }
-       if (pgdat)
-               spin_unlock_irqrestore(&pgdat->lru_lock, flags);
+       if (lruvec)
+               unlock_page_lruvec_irqrestore(lruvec, flags);
         release_pages(pvec->pages, pvec->nr);
         pagevec_reinit(pvec);
  }
diff --git a/mm/vmscan.c b/mm/vmscan.c

index cf99e66..b27b5db 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1764,14 +1764,12 @@ int isolate_lru_page(struct page *page)
         WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");
  
         if (TestClearPageLRU(page)) {
-               pg_data_t *pgdat = page_pgdat(page);
                 struct lruvec *lruvec;
  
                 get_page(page);
-               lruvec = mem_cgroup_page_lruvec(page, pgdat);
-               spin_lock_irq(&pgdat->lru_lock);
+               lruvec = lock_page_lruvec_irq(page);
                 del_page_from_lru_list(page, lruvec, page_lru(page));
-               spin_unlock_irq(&pgdat->lru_lock);
+               unlock_page_lruvec_irq(lruvec);
                 ret = 0;
         }
  
@@ -1838,7 +1836,6 @@ static int too_many_isolated(struct pglist_data *pgdat, int file,
  static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
                                                      struct list_head *list)
  {
-       struct pglist_data *pgdat = lruvec_pgdat(lruvec);
         int nr_pages, nr_moved = 0;
         LIST_HEAD(pages_to_free);
         struct page *page;
@@ -1849,9 +1846,9 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
                 VM_BUG_ON_PAGE(PageLRU(page), page);
                 list_del(&page->lru);
                 if (unlikely(!page_evictable(page))) {
-                       spin_unlock_irq(&pgdat->lru_lock);
+                       spin_unlock_irq(&lruvec->lru_lock);
                         putback_lru_page(page);
-                       spin_lock_irq(&pgdat->lru_lock);
+                       spin_lock_irq(&lruvec->lru_lock);
                         continue;
                 }
  
@@ -1873,9 +1870,9 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
                         __ClearPageActive(page);
  
                         if (unlikely(PageCompound(page))) {
-                               spin_unlock_irq(&pgdat->lru_lock);
+                               spin_unlock_irq(&lruvec->lru_lock);
                                 destroy_compound_page(page);
-                               spin_lock_irq(&pgdat->lru_lock);
+                               spin_lock_irq(&lruvec->lru_lock);
                         } else
                                 list_add(&page->lru, &pages_to_free);
  
@@ -1952,7 +1949,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
  
         lru_add_drain();
  
-       spin_lock_irq(&pgdat->lru_lock);
+       spin_lock_irq(&lruvec->lru_lock);
  
         nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list,
                                      &nr_scanned, sc, lru);
@@ -1964,14 +1961,14 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
         __count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned);
         __count_vm_events(PGSCAN_ANON + file, nr_scanned);
  
-       spin_unlock_irq(&pgdat->lru_lock);
+       spin_unlock_irq(&lruvec->lru_lock);
  
         if (nr_taken == 0)
                 return 0;
  
         nr_reclaimed = shrink_page_list(&page_list, pgdat, sc, &stat, false);
  
-       spin_lock_irq(&pgdat->lru_lock);
+       spin_lock_irq(&lruvec->lru_lock);
         move_pages_to_lru(lruvec, &page_list);
  
         __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
@@ -1980,7 +1977,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
                 __count_vm_events(item, nr_reclaimed);
         __count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed);
         __count_vm_events(PGSTEAL_ANON + file, nr_reclaimed);
-       spin_unlock_irq(&pgdat->lru_lock);
+       spin_unlock_irq(&lruvec->lru_lock);
  
         lru_note_cost(lruvec, file, stat.nr_pageout);
         mem_cgroup_uncharge_list(&page_list);
@@ -2033,7 +2030,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
  
         lru_add_drain();
  
-       spin_lock_irq(&pgdat->lru_lock);
+       spin_lock_irq(&lruvec->lru_lock);
  
         nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
                                      &nr_scanned, sc, lru);
@@ -2044,7 +2041,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
                 __count_vm_events(PGREFILL, nr_scanned);
         __count_memcg_events(lruvec_memcg(lruvec), PGREFILL, nr_scanned);
  
-       spin_unlock_irq(&pgdat->lru_lock);
+       spin_unlock_irq(&lruvec->lru_lock);
  
         while (!list_empty(&l_hold)) {
                 cond_resched();
@@ -2090,7 +2087,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
         /*
          * Move pages back to the lru list.
          */
-       spin_lock_irq(&pgdat->lru_lock);
+       spin_lock_irq(&lruvec->lru_lock);
  
         nr_activate = move_pages_to_lru(lruvec, &l_active);
         nr_deactivate = move_pages_to_lru(lruvec, &l_inactive);
@@ -2101,7 +2098,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
         __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_deactivate);
  
         __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
-       spin_unlock_irq(&pgdat->lru_lock);
+       spin_unlock_irq(&lruvec->lru_lock);
  
         mem_cgroup_uncharge_list(&l_active);
         free_unref_page_list(&l_active);
@@ -2689,10 +2686,10 @@ again:
         /*
          * Determine the scan balance between anon and file LRUs.
          */
-       spin_lock_irq(&pgdat->lru_lock);
+       spin_lock_irq(&target_lruvec->lru_lock);
         sc->anon_cost = target_lruvec->anon_cost;
         sc->file_cost = target_lruvec->file_cost;
-       spin_unlock_irq(&pgdat->lru_lock);
+       spin_unlock_irq(&target_lruvec->lru_lock);
  
         /*
          * Target desirable inactive:active list ratios for the anon
@@ -4268,16 +4265,15 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
   */
  void check_move_unevictable_pages(struct pagevec *pvec)
  {
-       struct lruvec *lruvec;
-       struct pglist_data *pgdat = NULL;
+       struct lruvec *lruvec = NULL;
         int pgscanned = 0;
         int pgrescued = 0;
         int i;
  
         for (i = 0; i < pvec->nr; i++) {
                 struct page *page = pvec->pages[i];
-               struct pglist_data *pagepgdat = page_pgdat(page);
                 int nr_pages;
+               struct lruvec *new_lruvec;
  
                 if (PageTransTail(page))
                         continue;
@@ -4289,13 +4285,12 @@ void check_move_unevictable_pages(struct pagevec *pvec)
                 if (!TestClearPageLRU(page))
                         continue;
  
-               if (pagepgdat != pgdat) {
-                       if (pgdat)
-                               spin_unlock_irq(&pgdat->lru_lock);
-                       pgdat = pagepgdat;
-                       spin_lock_irq(&pgdat->lru_lock);
+               new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
+               if (lruvec != new_lruvec) {
+                       if (lruvec)
+                               unlock_page_lruvec_irq(lruvec);
+                       lruvec = lock_page_lruvec_irq(page);
                 }
-               lruvec = mem_cgroup_page_lruvec(page, pgdat);
  
                 if (page_evictable(page) && PageUnevictable(page)) {
                         enum lru_list lru = page_lru_base_type(page);
@@ -4309,10 +4304,10 @@ void check_move_unevictable_pages(struct pagevec *pvec)
                 SetPageLRU(page);
         }
  
-       if (pgdat) {
+       if (lruvec) {
                 __count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
                 __count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
-               spin_unlock_irq(&pgdat->lru_lock);
+               unlock_page_lruvec_irq(lruvec);
         } else if (pgscanned) {
                 count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
         }
author	Alex Shi <alex.shi@linux.alibaba.com>
	Tue, 15 Dec 2020 20:34:29 +0000 (12:34 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 15 Dec 2020 22:48:04 +0000 (14:48 -0800)
include/linux/memcontrol.h		patch \| blob \| history
include/linux/mmzone.h		patch \| blob \| history
mm/compaction.c		patch \| blob \| history
mm/huge_memory.c		patch \| blob \| history
mm/memcontrol.c		patch \| blob \| history
mm/mlock.c		patch \| blob \| history
mm/mmzone.c		patch \| blob \| history
mm/page_alloc.c		patch \| blob \| history
mm/swap.c		patch \| blob \| history
mm/vmscan.c		patch \| blob \| history