/* Active memory cgroup to use from an interrupt context */
DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
+EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);
/* Socket memory accounting disabled? */
static bool cgroup_memory_nosocket;
/* Kernel memory accounting disabled? */
-static bool cgroup_memory_nokmem;
+bool cgroup_memory_nokmem;
/* Whether the swap controller is active */
#ifdef CONFIG_MEMCG_SWAP
static void obj_cgroup_release(struct percpu_ref *ref)
{
struct obj_cgroup *objcg = container_of(ref, struct obj_cgroup, refcnt);
- struct mem_cgroup *memcg;
unsigned int nr_bytes;
unsigned int nr_pages;
unsigned long flags;
WARN_ON_ONCE(nr_bytes & (PAGE_SIZE - 1));
nr_pages = nr_bytes >> PAGE_SHIFT;
- spin_lock_irqsave(&css_set_lock, flags);
- memcg = obj_cgroup_memcg(objcg);
if (nr_pages)
obj_cgroup_uncharge_pages(objcg, nr_pages);
+
+ spin_lock_irqsave(&css_set_lock, flags);
list_del(&objcg->list);
- mem_cgroup_put(memcg);
spin_unlock_irqrestore(&css_set_lock, flags);
percpu_ref_exit(ref);
spin_lock_irq(&css_set_lock);
- /* Move active objcg to the parent's list */
- xchg(&objcg->memcg, parent);
- css_get(&parent->css);
- list_add(&objcg->list, &parent->objcg_list);
-
- /* Move already reparented objcgs to the parent's list */
- list_for_each_entry(iter, &memcg->objcg_list, list) {
- css_get(&parent->css);
- xchg(&iter->memcg, parent);
- css_put(&memcg->css);
- }
+ /* 1) Ready to reparent active objcg. */
+ list_add(&objcg->list, &memcg->objcg_list);
+ /* 2) Reparent active objcg and already reparented objcgs to parent. */
+ list_for_each_entry(iter, &memcg->objcg_list, list)
+ WRITE_ONCE(iter->memcg, parent);
+ /* 3) Move already reparented objcgs to the parent's list */
list_splice(&memcg->objcg_list, &parent->objcg_list);
spin_unlock_irq(&css_set_lock);
}
EXPORT_SYMBOL(mem_cgroup_from_task);
+static __always_inline struct mem_cgroup *active_memcg(void)
+{
+ if (in_interrupt())
+ return this_cpu_read(int_active_memcg);
+ else
+ return current->active_memcg;
+}
+
/**
* get_mem_cgroup_from_mm: Obtain a reference on given mm_struct's memcg.
* @mm: mm from which memcg should be extracted. It can be NULL.
*
- * Obtain a reference on mm->memcg and returns it if successful. Otherwise
- * root_mem_cgroup is returned. However if mem_cgroup is disabled, NULL is
- * returned.
+ * Obtain a reference on mm->memcg and returns it if successful. If mm
+ * is NULL, then the memcg is chosen as follows:
+ * 1) The active memcg, if set.
+ * 2) current->mm->memcg, if available
+ * 3) root memcg
+ * If mem_cgroup is disabled, NULL is returned.
*/
struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
{
if (mem_cgroup_disabled())
return NULL;
+ /*
+ * Page cache insertions can happen without an
+ * actual mm context, e.g. during disk probing
+ * on boot, loopback IO, acct() writes etc.
+ *
+ * No need to css_get on root memcg as the reference
+ * counting is disabled on the root level in the
+ * cgroup core. See CSS_NO_REF.
+ */
+ if (unlikely(!mm)) {
+ memcg = active_memcg();
+ if (unlikely(memcg)) {
+ /* remote memcg must hold a ref */
+ css_get(&memcg->css);
+ return memcg;
+ }
+ mm = current->mm;
+ if (unlikely(!mm))
+ return root_mem_cgroup;
+ }
+
rcu_read_lock();
do {
- /*
- * Page cache insertions can happen without an
- * actual mm context, e.g. during disk probing
- * on boot, loopback IO, acct() writes etc.
- */
- if (unlikely(!mm))
+ memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
+ if (unlikely(!memcg))
memcg = root_mem_cgroup;
- else {
- memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
- if (unlikely(!memcg))
- memcg = root_mem_cgroup;
- }
} while (!css_tryget(&memcg->css));
rcu_read_unlock();
return memcg;
}
EXPORT_SYMBOL(get_mem_cgroup_from_mm);
-static __always_inline struct mem_cgroup *active_memcg(void)
-{
- if (in_interrupt())
- return this_cpu_read(int_active_memcg);
- else
- return current->active_memcg;
-}
-
static __always_inline bool memcg_kmem_bypass(void)
{
/* Allow remote memcg charging from any context. */
struct lruvec *lock_page_lruvec(struct page *page)
{
struct lruvec *lruvec;
- struct pglist_data *pgdat = page_pgdat(page);
- lruvec = mem_cgroup_page_lruvec(page, pgdat);
+ lruvec = mem_cgroup_page_lruvec(page);
spin_lock(&lruvec->lru_lock);
lruvec_memcg_debug(lruvec, page);
struct lruvec *lock_page_lruvec_irq(struct page *page)
{
struct lruvec *lruvec;
- struct pglist_data *pgdat = page_pgdat(page);
- lruvec = mem_cgroup_page_lruvec(page, pgdat);
+ lruvec = mem_cgroup_page_lruvec(page);
spin_lock_irq(&lruvec->lru_lock);
lruvec_memcg_debug(lruvec, page);
struct lruvec *lock_page_lruvec_irqsave(struct page *page, unsigned long *flags)
{
struct lruvec *lruvec;
- struct pglist_data *pgdat = page_pgdat(page);
- lruvec = mem_cgroup_page_lruvec(page, pgdat);
+ lruvec = mem_cgroup_page_lruvec(page);
spin_lock_irqsave(&lruvec->lru_lock, *flags);
lruvec_memcg_debug(lruvec, page);
css_put(&memcg->css);
}
-static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
- unsigned int nr_pages)
+static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
+ unsigned int nr_pages)
{
unsigned int batch = max(MEMCG_CHARGE_BATCH, nr_pages);
int nr_retries = MAX_RECLAIM_RETRIES;
bool drained = false;
unsigned long pflags;
- if (mem_cgroup_is_root(memcg))
- return 0;
retry:
if (consume_stock(memcg, nr_pages))
return 0;
return 0;
}
+static inline int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
+ unsigned int nr_pages)
+{
+ if (mem_cgroup_is_root(memcg))
+ return 0;
+
+ return try_charge_memcg(memcg, gfp_mask, nr_pages);
+}
+
#if defined(CONFIG_MEMCG_KMEM) || defined(CONFIG_MMU)
static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
{
}
#ifdef CONFIG_MEMCG_KMEM
+/*
+ * The allocated objcg pointers array is not accounted directly.
+ * Moreover, it should not come from DMA buffer and is not readily
+ * reclaimable. So those GFP bits should be masked off.
+ */
+#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | __GFP_ACCOUNT)
+
int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
gfp_t gfp, bool new_page)
{
unsigned long memcg_data;
void *vec;
+ gfp &= ~OBJCGS_CLEAR_MASK;
vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp,
page_to_nid(page));
if (!vec)
memcg = get_mem_cgroup_from_objcg(objcg);
- ret = try_charge(memcg, gfp, nr_pages);
+ ret = try_charge_memcg(memcg, gfp, nr_pages);
if (ret)
goto out;
* @gfp_mask: reclaim mode
*
* Try to charge @page to the memcg that @mm belongs to, reclaiming
- * pages according to @gfp_mask if necessary.
+ * pages according to @gfp_mask if necessary. if @mm is NULL, try to
+ * charge to the active memcg.
*
* Do not use this for pages allocated for swapin.
*
/* Force-charge the new page. The old one will be freed soon */
nr_pages = thp_nr_pages(newpage);
- page_counter_charge(&memcg->memory, nr_pages);
- if (do_memsw_account())
- page_counter_charge(&memcg->memsw, nr_pages);
+ if (!mem_cgroup_is_root(memcg)) {
+ page_counter_charge(&memcg->memory, nr_pages);
+ if (do_memsw_account())
+ page_counter_charge(&memcg->memsw, nr_pages);
+ }
css_get(&memcg->css);
commit_charge(newpage, memcg);