* Lockless page tracking & accounting
* Unified hierarchy configuration model
* Copyright (C) 2015 Red Hat, Inc., Johannes Weiner
+ *
+ * Per memcg lru locking
+ * Copyright (C) 2020 Alibaba, Inc, Alex Shi
*/
#include <linux/page_counter.h>
return ret;
}
+#ifdef CONFIG_DEBUG_VM
+void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
+{
+ struct mem_cgroup *memcg;
+
+ if (mem_cgroup_disabled())
+ return;
+
+ memcg = page_memcg(page);
+
+ if (!memcg)
+ VM_BUG_ON_PAGE(lruvec_memcg(lruvec) != root_mem_cgroup, page);
+ else
+ VM_BUG_ON_PAGE(lruvec_memcg(lruvec) != memcg, page);
+}
+#endif
+
/**
- * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
+ * lock_page_lruvec - lock and return lruvec for a given page.
* @page: the page
- * @pgdat: pgdat of the page
*
- * This function relies on page's memcg being stable - see the
- * access rules in commit_charge().
+ * This series functions should be used in either conditions:
+ * PageLRU is cleared or unset
+ * or page->_refcount is zero
+ * or page is locked.
*/
-struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgdat)
+struct lruvec *lock_page_lruvec(struct page *page)
{
- struct mem_cgroup_per_node *mz;
- struct mem_cgroup *memcg;
struct lruvec *lruvec;
+ struct pglist_data *pgdat = page_pgdat(page);
- if (mem_cgroup_disabled()) {
- lruvec = &pgdat->__lruvec;
- goto out;
- }
+ rcu_read_lock();
+ lruvec = mem_cgroup_page_lruvec(page, pgdat);
+ spin_lock(&lruvec->lru_lock);
+ rcu_read_unlock();
- memcg = page_memcg(page);
- /*
- * Swapcache readahead pages are added to the LRU - and
- * possibly migrated - before they are charged.
- */
- if (!memcg)
- memcg = root_mem_cgroup;
+ lruvec_memcg_debug(lruvec, page);
+
+ return lruvec;
+}
+
+struct lruvec *lock_page_lruvec_irq(struct page *page)
+{
+ struct lruvec *lruvec;
+ struct pglist_data *pgdat = page_pgdat(page);
+
+ rcu_read_lock();
+ lruvec = mem_cgroup_page_lruvec(page, pgdat);
+ spin_lock_irq(&lruvec->lru_lock);
+ rcu_read_unlock();
+
+ lruvec_memcg_debug(lruvec, page);
+
+ return lruvec;
+}
+
+struct lruvec *lock_page_lruvec_irqsave(struct page *page, unsigned long *flags)
+{
+ struct lruvec *lruvec;
+ struct pglist_data *pgdat = page_pgdat(page);
+
+ rcu_read_lock();
+ lruvec = mem_cgroup_page_lruvec(page, pgdat);
+ spin_lock_irqsave(&lruvec->lru_lock, *flags);
+ rcu_read_unlock();
+
+ lruvec_memcg_debug(lruvec, page);
- mz = mem_cgroup_page_nodeinfo(memcg, page);
- lruvec = &mz->lruvec;
-out:
- /*
- * Since a node can be onlined after the mem_cgroup was created,
- * we have to be prepared to initialize lruvec->zone here;
- * and if offlined then reonlined, we need to reinitialize it.
- */
- if (unlikely(lruvec->pgdat != pgdat))
- lruvec->pgdat = pgdat;
return lruvec;
}
if (unlikely(!memcg))
return NULL;
+#ifdef CONFIG_PROVE_LOCKING
+ local_irq_save(flags);
+ might_lock(&memcg->move_lock);
+ local_irq_restore(flags);
+#endif
+
if (atomic_read(&memcg->moving_account) <= 0)
return memcg;
#endif /* CONFIG_MEMCG_KMEM */
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-
/*
- * Because tail pages are not marked as "used", set it. We're under
- * pgdat->lru_lock and migration entries setup in all page mappings.
+ * Because page_memcg(head) is not set on compound tails, set it now.
*/
void mem_cgroup_split_huge_fixup(struct page *head)
{
return;
memcg = page_memcg(oldpage);
+ VM_WARN_ON_ONCE_PAGE(!memcg, oldpage);
if (!memcg)
return;
VM_BUG_ON_PAGE(PageLRU(page), page);
VM_BUG_ON_PAGE(page_count(page), page);
+ if (mem_cgroup_disabled())
+ return;
+
if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
return;
memcg = page_memcg(page);
- /* Readahead page, never charged */
+ VM_WARN_ON_ONCE_PAGE(!memcg, page);
if (!memcg)
return;
struct mem_cgroup *memcg;
unsigned short oldid;
+ if (mem_cgroup_disabled())
+ return 0;
+
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
return 0;
memcg = page_memcg(page);
- /* Readahead page, never charged */
+ VM_WARN_ON_ONCE_PAGE(!memcg, page);
if (!memcg)
return 0;