arm64: pac: Optimize kernel entry/exit key installation code paths
[linux-2.6-microblaze.git] / mm / memcontrol.c
index b2405f0..e064ac0 100644 (file)
@@ -255,6 +255,11 @@ struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr)
 #ifdef CONFIG_MEMCG_KMEM
 extern spinlock_t css_set_lock;
 
+static int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,
+                              unsigned int nr_pages);
+static void __memcg_kmem_uncharge(struct mem_cgroup *memcg,
+                                 unsigned int nr_pages);
+
 static void obj_cgroup_release(struct percpu_ref *ref)
 {
        struct obj_cgroup *objcg = container_of(ref, struct obj_cgroup, refcnt);
@@ -447,8 +452,7 @@ static void memcg_free_shrinker_maps(struct mem_cgroup *memcg)
        for_each_node(nid) {
                pn = mem_cgroup_nodeinfo(memcg, nid);
                map = rcu_dereference_protected(pn->shrinker_map, true);
-               if (map)
-                       kvfree(map);
+               kvfree(map);
                rcu_assign_pointer(pn->shrinker_map, NULL);
        }
 }
@@ -1043,29 +1047,6 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
 }
 EXPORT_SYMBOL(get_mem_cgroup_from_mm);
 
-/**
- * get_mem_cgroup_from_page: Obtain a reference on given page's memcg.
- * @page: page from which memcg should be extracted.
- *
- * Obtain a reference on page->memcg and returns it if successful. Otherwise
- * root_mem_cgroup is returned.
- */
-struct mem_cgroup *get_mem_cgroup_from_page(struct page *page)
-{
-       struct mem_cgroup *memcg = page_memcg(page);
-
-       if (mem_cgroup_disabled())
-               return NULL;
-
-       rcu_read_lock();
-       /* Page should not get uncharged and freed memcg under us. */
-       if (!memcg || WARN_ON_ONCE(!css_tryget(&memcg->css)))
-               memcg = root_mem_cgroup;
-       rcu_read_unlock();
-       return memcg;
-}
-EXPORT_SYMBOL(get_mem_cgroup_from_page);
-
 static __always_inline struct mem_cgroup *active_memcg(void)
 {
        if (in_interrupt())
@@ -1080,13 +1061,9 @@ static __always_inline struct mem_cgroup *get_active_memcg(void)
 
        rcu_read_lock();
        memcg = active_memcg();
-       if (memcg) {
-               /* current->active_memcg must hold a ref. */
-               if (WARN_ON_ONCE(!css_tryget(&memcg->css)))
-                       memcg = root_mem_cgroup;
-               else
-                       memcg = current->active_memcg;
-       }
+       /* remote memcg must hold a ref. */
+       if (memcg && WARN_ON_ONCE(!css_tryget(&memcg->css)))
+               memcg = root_mem_cgroup;
        rcu_read_unlock();
 
        return memcg;
@@ -1346,20 +1323,19 @@ void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
  * lock_page_lruvec - lock and return lruvec for a given page.
  * @page: the page
  *
- * This series functions should be used in either conditions:
- * PageLRU is cleared or unset
- * or page->_refcount is zero
- * or page is locked.
+ * These functions are safe to use under any of the following conditions:
+ * - page locked
+ * - PageLRU cleared
+ * - lock_page_memcg()
+ * - page->_refcount is zero
  */
 struct lruvec *lock_page_lruvec(struct page *page)
 {
        struct lruvec *lruvec;
        struct pglist_data *pgdat = page_pgdat(page);
 
-       rcu_read_lock();
        lruvec = mem_cgroup_page_lruvec(page, pgdat);
        spin_lock(&lruvec->lru_lock);
-       rcu_read_unlock();
 
        lruvec_memcg_debug(lruvec, page);
 
@@ -1371,10 +1347,8 @@ struct lruvec *lock_page_lruvec_irq(struct page *page)
        struct lruvec *lruvec;
        struct pglist_data *pgdat = page_pgdat(page);
 
-       rcu_read_lock();
        lruvec = mem_cgroup_page_lruvec(page, pgdat);
        spin_lock_irq(&lruvec->lru_lock);
-       rcu_read_unlock();
 
        lruvec_memcg_debug(lruvec, page);
 
@@ -1386,10 +1360,8 @@ struct lruvec *lock_page_lruvec_irqsave(struct page *page, unsigned long *flags)
        struct lruvec *lruvec;
        struct pglist_data *pgdat = page_pgdat(page);
 
-       rcu_read_lock();
        lruvec = mem_cgroup_page_lruvec(page, pgdat);
        spin_lock_irqsave(&lruvec->lru_lock, *flags);
-       rcu_read_unlock();
 
        lruvec_memcg_debug(lruvec, page);
 
@@ -1512,71 +1484,73 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
 
 struct memory_stat {
        const char *name;
-       unsigned int ratio;
        unsigned int idx;
 };
 
-static struct memory_stat memory_stats[] = {
-       { "anon", PAGE_SIZE, NR_ANON_MAPPED },
-       { "file", PAGE_SIZE, NR_FILE_PAGES },
-       { "kernel_stack", 1024, NR_KERNEL_STACK_KB },
-       { "pagetables", PAGE_SIZE, NR_PAGETABLE },
-       { "percpu", 1, MEMCG_PERCPU_B },
-       { "sock", PAGE_SIZE, MEMCG_SOCK },
-       { "shmem", PAGE_SIZE, NR_SHMEM },
-       { "file_mapped", PAGE_SIZE, NR_FILE_MAPPED },
-       { "file_dirty", PAGE_SIZE, NR_FILE_DIRTY },
-       { "file_writeback", PAGE_SIZE, NR_WRITEBACK },
+static const struct memory_stat memory_stats[] = {
+       { "anon",                       NR_ANON_MAPPED                  },
+       { "file",                       NR_FILE_PAGES                   },
+       { "kernel_stack",               NR_KERNEL_STACK_KB              },
+       { "pagetables",                 NR_PAGETABLE                    },
+       { "percpu",                     MEMCG_PERCPU_B                  },
+       { "sock",                       MEMCG_SOCK                      },
+       { "shmem",                      NR_SHMEM                        },
+       { "file_mapped",                NR_FILE_MAPPED                  },
+       { "file_dirty",                 NR_FILE_DIRTY                   },
+       { "file_writeback",             NR_WRITEBACK                    },
+#ifdef CONFIG_SWAP
+       { "swapcached",                 NR_SWAPCACHE                    },
+#endif
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-       /*
-        * The ratio will be initialized in memory_stats_init(). Because
-        * on some architectures, the macro of HPAGE_PMD_SIZE is not
-        * constant(e.g. powerpc).
-        */
-       { "anon_thp", PAGE_SIZE, NR_ANON_THPS },
-       { "file_thp", 0, NR_FILE_THPS },
-       { "shmem_thp", 0, NR_SHMEM_THPS },
+       { "anon_thp",                   NR_ANON_THPS                    },
+       { "file_thp",                   NR_FILE_THPS                    },
+       { "shmem_thp",                  NR_SHMEM_THPS                   },
 #endif
-       { "inactive_anon", PAGE_SIZE, NR_INACTIVE_ANON },
-       { "active_anon", PAGE_SIZE, NR_ACTIVE_ANON },
-       { "inactive_file", PAGE_SIZE, NR_INACTIVE_FILE },
-       { "active_file", PAGE_SIZE, NR_ACTIVE_FILE },
-       { "unevictable", PAGE_SIZE, NR_UNEVICTABLE },
-
-       /*
-        * Note: The slab_reclaimable and slab_unreclaimable must be
-        * together and slab_reclaimable must be in front.
-        */
-       { "slab_reclaimable", 1, NR_SLAB_RECLAIMABLE_B },
-       { "slab_unreclaimable", 1, NR_SLAB_UNRECLAIMABLE_B },
+       { "inactive_anon",              NR_INACTIVE_ANON                },
+       { "active_anon",                NR_ACTIVE_ANON                  },
+       { "inactive_file",              NR_INACTIVE_FILE                },
+       { "active_file",                NR_ACTIVE_FILE                  },
+       { "unevictable",                NR_UNEVICTABLE                  },
+       { "slab_reclaimable",           NR_SLAB_RECLAIMABLE_B           },
+       { "slab_unreclaimable",         NR_SLAB_UNRECLAIMABLE_B         },
 
        /* The memory events */
-       { "workingset_refault_anon", 1, WORKINGSET_REFAULT_ANON },
-       { "workingset_refault_file", 1, WORKINGSET_REFAULT_FILE },
-       { "workingset_activate_anon", 1, WORKINGSET_ACTIVATE_ANON },
-       { "workingset_activate_file", 1, WORKINGSET_ACTIVATE_FILE },
-       { "workingset_restore_anon", 1, WORKINGSET_RESTORE_ANON },
-       { "workingset_restore_file", 1, WORKINGSET_RESTORE_FILE },
-       { "workingset_nodereclaim", 1, WORKINGSET_NODERECLAIM },
+       { "workingset_refault_anon",    WORKINGSET_REFAULT_ANON         },
+       { "workingset_refault_file",    WORKINGSET_REFAULT_FILE         },
+       { "workingset_activate_anon",   WORKINGSET_ACTIVATE_ANON        },
+       { "workingset_activate_file",   WORKINGSET_ACTIVATE_FILE        },
+       { "workingset_restore_anon",    WORKINGSET_RESTORE_ANON         },
+       { "workingset_restore_file",    WORKINGSET_RESTORE_FILE         },
+       { "workingset_nodereclaim",     WORKINGSET_NODERECLAIM          },
 };
 
-static int __init memory_stats_init(void)
-{
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(memory_stats); i++) {
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-               if (memory_stats[i].idx == NR_FILE_THPS ||
-                   memory_stats[i].idx == NR_SHMEM_THPS)
-                       memory_stats[i].ratio = HPAGE_PMD_SIZE;
-#endif
-               VM_BUG_ON(!memory_stats[i].ratio);
-               VM_BUG_ON(memory_stats[i].idx >= MEMCG_NR_STAT);
+/* Translate stat items to the correct unit for memory.stat output */
+static int memcg_page_state_unit(int item)
+{
+       switch (item) {
+       case MEMCG_PERCPU_B:
+       case NR_SLAB_RECLAIMABLE_B:
+       case NR_SLAB_UNRECLAIMABLE_B:
+       case WORKINGSET_REFAULT_ANON:
+       case WORKINGSET_REFAULT_FILE:
+       case WORKINGSET_ACTIVATE_ANON:
+       case WORKINGSET_ACTIVATE_FILE:
+       case WORKINGSET_RESTORE_ANON:
+       case WORKINGSET_RESTORE_FILE:
+       case WORKINGSET_NODERECLAIM:
+               return 1;
+       case NR_KERNEL_STACK_KB:
+               return SZ_1K;
+       default:
+               return PAGE_SIZE;
        }
+}
 
-       return 0;
+static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg,
+                                                   int item)
+{
+       return memcg_page_state(memcg, item) * memcg_page_state_unit(item);
 }
-pure_initcall(memory_stats_init);
 
 static char *memory_stat_format(struct mem_cgroup *memcg)
 {
@@ -1601,13 +1575,12 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
        for (i = 0; i < ARRAY_SIZE(memory_stats); i++) {
                u64 size;
 
-               size = memcg_page_state(memcg, memory_stats[i].idx);
-               size *= memory_stats[i].ratio;
+               size = memcg_page_state_output(memcg, memory_stats[i].idx);
                seq_buf_printf(&s, "%s %llu\n", memory_stats[i].name, size);
 
                if (unlikely(memory_stats[i].idx == NR_SLAB_UNRECLAIMABLE_B)) {
-                       size = memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) +
-                              memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE_B);
+                       size += memcg_page_state_output(memcg,
+                                                       NR_SLAB_RECLAIMABLE_B);
                        seq_buf_printf(&s, "slab %llu\n", size);
                }
        }
@@ -3091,8 +3064,8 @@ static void memcg_free_cache_id(int id)
  *
  * Returns 0 on success, an error code on failure.
  */
-int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,
-                       unsigned int nr_pages)
+static int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,
+                              unsigned int nr_pages)
 {
        struct page_counter *counter;
        int ret;
@@ -3124,7 +3097,7 @@ int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,
  * @memcg: memcg to uncharge
  * @nr_pages: number of pages to uncharge
  */
-void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages)
+static void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
        if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
                page_counter_uncharge(&memcg->kmem, nr_pages);
@@ -3314,24 +3287,21 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
 
 #endif /* CONFIG_MEMCG_KMEM */
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 /*
- * Because page_memcg(head) is not set on compound tails, set it now.
+ * Because page_memcg(head) is not set on tails, set it now.
  */
-void mem_cgroup_split_huge_fixup(struct page *head)
+void split_page_memcg(struct page *head, unsigned int nr)
 {
        struct mem_cgroup *memcg = page_memcg(head);
        int i;
 
-       if (mem_cgroup_disabled())
+       if (mem_cgroup_disabled() || !memcg)
                return;
 
-       for (i = 1; i < HPAGE_PMD_NR; i++) {
-               css_get(&memcg->css);
-               head[i].memcg_data = (unsigned long)memcg;
-       }
+       for (i = 1; i < nr; i++)
+               head[i].memcg_data = head->memcg_data;
+       css_get_many(&memcg->css, nr - 1);
 }
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #ifdef CONFIG_MEMCG_SWAP
 /**
@@ -6398,6 +6368,12 @@ static int memory_stat_show(struct seq_file *m, void *v)
 }
 
 #ifdef CONFIG_NUMA
+static inline unsigned long lruvec_page_state_output(struct lruvec *lruvec,
+                                                    int item)
+{
+       return lruvec_page_state(lruvec, item) * memcg_page_state_unit(item);
+}
+
 static int memory_numa_stat_show(struct seq_file *m, void *v)
 {
        int i;
@@ -6415,8 +6391,8 @@ static int memory_numa_stat_show(struct seq_file *m, void *v)
                        struct lruvec *lruvec;
 
                        lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
-                       size = lruvec_page_state(lruvec, memory_stats[i].idx);
-                       size *= memory_stats[i].ratio;
+                       size = lruvec_page_state_output(lruvec,
+                                                       memory_stats[i].idx);
                        seq_printf(m, " N%d=%llu", nid, size);
                }
                seq_putc(m, '\n');
@@ -6765,7 +6741,19 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
        memcg_check_events(memcg, page);
        local_irq_enable();
 
-       if (PageSwapCache(page)) {
+       /*
+        * Cgroup1's unified memory+swap counter has been charged with the
+        * new swapcache page, finish the transfer by uncharging the swap
+        * slot. The swap slot would also get uncharged when it dies, but
+        * it can stick around indefinitely and we'd count the page twice
+        * the entire time.
+        *
+        * Cgroup2 has separate resource counters for memory and swap,
+        * so this is a non-issue here. Memory and swap charge lifetimes
+        * correspond 1:1 to page and swap slot lifetimes: we charge the
+        * page to memory here, and uncharge swap when the slot is freed.
+        */
+       if (do_memsw_account() && PageSwapCache(page)) {
                swp_entry_t entry = { .val = page_private(page) };
                /*
                 * The swap entry might not get freed for a long time,
@@ -6856,31 +6844,6 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
        css_put(&ug->memcg->css);
 }
 
-static void uncharge_list(struct list_head *page_list)
-{
-       struct uncharge_gather ug;
-       struct list_head *next;
-
-       uncharge_gather_clear(&ug);
-
-       /*
-        * Note that the list can be a single page->lru; hence the
-        * do-while loop instead of a simple list_for_each_entry().
-        */
-       next = page_list->next;
-       do {
-               struct page *page;
-
-               page = list_entry(next, struct page, lru);
-               next = page->lru.next;
-
-               uncharge_page(page, &ug);
-       } while (next != page_list);
-
-       if (ug.memcg)
-               uncharge_batch(&ug);
-}
-
 /**
  * mem_cgroup_uncharge - uncharge a page
  * @page: page to uncharge
@@ -6912,11 +6875,17 @@ void mem_cgroup_uncharge(struct page *page)
  */
 void mem_cgroup_uncharge_list(struct list_head *page_list)
 {
+       struct uncharge_gather ug;
+       struct page *page;
+
        if (mem_cgroup_disabled())
                return;
 
-       if (!list_empty(page_list))
-               uncharge_list(page_list);
+       uncharge_gather_clear(&ug);
+       list_for_each_entry(page, page_list, lru)
+               uncharge_page(page, &ug);
+       if (ug.memcg)
+               uncharge_batch(&ug);
 }
 
 /**