#ifdef CONFIG_TRANSPARENT_HUGEPAGE
THP_FAULT_ALLOC,
THP_COLLAPSE_ALLOC,
+ THP_SWPOUT,
+ THP_SWPOUT_FALLBACK,
#endif
};
return x;
}
+static int memcg_page_state_unit(int item);
+
+/*
+ * Normalize the value passed into memcg_rstat_updated() to be in pages. Round
+ * up non-zero sub-page updates to 1 page as zero page updates are ignored.
+ */
+static int memcg_state_val_in_pages(int idx, int val)
+{
+ int unit = memcg_page_state_unit(idx);
+
+ if (!val || unit == PAGE_SIZE)
+ return val;
+ else
+ return max(val * unit / PAGE_SIZE, 1UL);
+}
+
/**
* __mod_memcg_state - update cgroup memory statistics
* @memcg: the memory cgroup
return;
__this_cpu_add(memcg->vmstats_percpu->state[idx], val);
- memcg_rstat_updated(memcg, val);
+ memcg_rstat_updated(memcg, memcg_state_val_in_pages(idx, val));
}
/* idx can be of type enum memcg_stat_item or node_stat_item. */
/* Update lruvec */
__this_cpu_add(pn->lruvec_stats_percpu->state[idx], val);
- memcg_rstat_updated(memcg, val);
+ memcg_rstat_updated(memcg, memcg_state_val_in_pages(idx, val));
memcg_stats_unlock();
}
return false;
}
+/**
+ * get_mem_cgroup_from_current - Obtain a reference on current task's memcg.
+ */
+struct mem_cgroup *get_mem_cgroup_from_current(void)
+{
+ struct mem_cgroup *memcg;
+
+ if (mem_cgroup_disabled())
+ return NULL;
+
+again:
+ rcu_read_lock();
+ memcg = mem_cgroup_from_task(current);
+ if (!css_tryget(&memcg->css)) {
+ rcu_read_unlock();
+ goto again;
+ }
+ rcu_read_unlock();
+ return memcg;
+}
+
/**
* mem_cgroup_iter - iterate over memory cgroup hierarchy
* @root: hierarchy root
{ "workingset_nodereclaim", WORKINGSET_NODERECLAIM },
};
-/* Translate stat items to the correct unit for memory.stat output */
+/* The actual unit of the state item, not the same as the output unit */
static int memcg_page_state_unit(int item)
{
switch (item) {
case MEMCG_ZSWAP_B:
case NR_SLAB_RECLAIMABLE_B:
case NR_SLAB_UNRECLAIMABLE_B:
+ return 1;
+ case NR_KERNEL_STACK_KB:
+ return SZ_1K;
+ default:
+ return PAGE_SIZE;
+ }
+}
+
+/* Translate stat items to the correct unit for memory.stat output */
+static int memcg_page_state_output_unit(int item)
+{
+ /*
+ * Workingset state is actually in pages, but we export it to userspace
+ * as a scalar count of events, so special case it here.
+ */
+ switch (item) {
case WORKINGSET_REFAULT_ANON:
case WORKINGSET_REFAULT_FILE:
case WORKINGSET_ACTIVATE_ANON:
case WORKINGSET_RESTORE_FILE:
case WORKINGSET_NODERECLAIM:
return 1;
- case NR_KERNEL_STACK_KB:
- return SZ_1K;
default:
- return PAGE_SIZE;
+ return memcg_page_state_unit(item);
}
}
static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg,
int item)
{
- return memcg_page_state(memcg, item) * memcg_page_state_unit(item);
+ return memcg_page_state(memcg, item) *
+ memcg_page_state_output_unit(item);
+}
+
+static inline unsigned long memcg_page_state_local_output(
+ struct mem_cgroup *memcg, int item)
+{
+ return memcg_page_state_local(memcg, item) *
+ memcg_page_state_output_unit(item);
}
static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
* Scheduled by try_charge() to be executed from the userland return path
* and reclaims memory over the high limit.
*/
-void mem_cgroup_handle_over_high(void)
+void mem_cgroup_handle_over_high(gfp_t gfp_mask)
{
unsigned long penalty_jiffies;
unsigned long pflags;
*/
nr_reclaimed = reclaim_high(memcg,
in_retry ? SWAP_CLUSTER_MAX : nr_pages,
- GFP_KERNEL);
+ gfp_mask);
/*
* memory.high is breached and reclaim is unable to keep up. Throttle
if (current->memcg_nr_pages_over_high > MEMCG_CHARGE_BATCH &&
!(current->flags & PF_MEMALLOC) &&
gfpflags_allow_blocking(gfp_mask)) {
- mem_cgroup_handle_over_high();
+ mem_cgroup_handle_over_high(gfp_mask);
}
return 0;
}
return try_charge_memcg(memcg, gfp_mask, nr_pages);
}
-static inline void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
+/**
+ * mem_cgroup_cancel_charge() - cancel an uncommitted try_charge() call.
+ * @memcg: memcg previously charged.
+ * @nr_pages: number of pages previously charged.
+ */
+void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
{
if (mem_cgroup_is_root(memcg))
return;
folio->memcg_data = (unsigned long)memcg;
}
+/**
+ * mem_cgroup_commit_charge - commit a previously successful try_charge().
+ * @folio: folio to commit the charge to.
+ * @memcg: memcg previously charged.
+ */
+void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg)
+{
+ css_get(&memcg->css);
+ commit_charge(folio, memcg);
+
+ local_irq_disable();
+ mem_cgroup_charge_statistics(memcg, folio_nr_pages(folio));
+ memcg_check_events(memcg, folio_nid(folio));
+ local_irq_enable();
+}
+
#ifdef CONFIG_MEMCG_KMEM
/*
* The allocated objcg pointers array is not accounted directly.
case _MEMSWAP:
ret = mem_cgroup_resize_max(memcg, nr_pages, true);
break;
+ case _KMEM:
+ pr_warn_once("kmem.limit_in_bytes is deprecated and will be removed. "
+ "Writing any value to this file has no effect. "
+ "Please report your usecase to linux-mm@kvack.org if you "
+ "depend on this functionality.\n");
+ ret = 0;
+ break;
case _TCP:
ret = memcg_update_tcp_max(memcg, nr_pages);
break;
NR_WRITEBACK,
WORKINGSET_REFAULT_ANON,
WORKINGSET_REFAULT_FILE,
+#ifdef CONFIG_SWAP
MEMCG_SWAP,
+ NR_SWAPCACHE,
+#endif
};
static const char *const memcg1_stat_names[] = {
"writeback",
"workingset_refault_anon",
"workingset_refault_file",
+#ifdef CONFIG_SWAP
"swap",
+ "swapcached",
+#endif
};
/* Universal VM events cgroup1 shows, original sort order */
for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
unsigned long nr;
- if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
- continue;
- nr = memcg_page_state_local(memcg, memcg1_stats[i]);
- seq_buf_printf(s, "%s %lu\n", memcg1_stat_names[i],
- nr * memcg_page_state_unit(memcg1_stats[i]));
+ nr = memcg_page_state_local_output(memcg, memcg1_stats[i]);
+ seq_buf_printf(s, "%s %lu\n", memcg1_stat_names[i], nr);
}
for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
}
seq_buf_printf(s, "hierarchical_memory_limit %llu\n",
(u64)memory * PAGE_SIZE);
- if (do_memsw_account())
- seq_buf_printf(s, "hierarchical_memsw_limit %llu\n",
- (u64)memsw * PAGE_SIZE);
+ seq_buf_printf(s, "hierarchical_memsw_limit %llu\n",
+ (u64)memsw * PAGE_SIZE);
for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
unsigned long nr;
- if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
- continue;
- nr = memcg_page_state(memcg, memcg1_stats[i]);
+ nr = memcg_page_state_output(memcg, memcg1_stats[i]);
seq_buf_printf(s, "total_%s %llu\n", memcg1_stat_names[i],
- (u64)nr * memcg_page_state_unit(memcg1_stats[i]));
+ (u64)nr);
}
for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
.seq_show = memcg_numa_stat_show,
},
#endif
+ {
+ .name = "kmem.limit_in_bytes",
+ .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT),
+ .write = mem_cgroup_write,
+ .read_u64 = mem_cgroup_read_u64,
+ },
{
.name = "kmem.usage_in_bytes",
.private = MEMFILE_PRIVATE(_KMEM, RES_USAGE),
/* we must uncharge all the leftover precharges from mc.to */
if (mc.precharge) {
- cancel_charge(mc.to, mc.precharge);
+ mem_cgroup_cancel_charge(mc.to, mc.precharge);
mc.precharge = 0;
}
/*
* we must uncharge here.
*/
if (mc.moved_charge) {
- cancel_charge(mc.from, mc.moved_charge);
+ mem_cgroup_cancel_charge(mc.from, mc.moved_charge);
mc.moved_charge = 0;
}
/* we must fixup refcnts and charges */
static inline unsigned long lruvec_page_state_output(struct lruvec *lruvec,
int item)
{
- return lruvec_page_state(lruvec, item) * memcg_page_state_unit(item);
+ return lruvec_page_state(lruvec, item) *
+ memcg_page_state_output_unit(item);
}
static int memory_numa_stat_show(struct seq_file *m, void *v)
static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
gfp_t gfp)
{
- long nr_pages = folio_nr_pages(folio);
int ret;
- ret = try_charge(memcg, gfp, nr_pages);
+ ret = try_charge(memcg, gfp, folio_nr_pages(folio));
if (ret)
goto out;
- css_get(&memcg->css);
- commit_charge(folio, memcg);
-
- local_irq_disable();
- mem_cgroup_charge_statistics(memcg, nr_pages);
- memcg_check_events(memcg, folio_nid(folio));
- local_irq_enable();
+ mem_cgroup_commit_charge(folio, memcg);
out:
return ret;
}
return ret;
}
+/**
+ * mem_cgroup_hugetlb_try_charge - try to charge the memcg for a hugetlb folio
+ * @memcg: memcg to charge.
+ * @gfp: reclaim mode.
+ * @nr_pages: number of pages to charge.
+ *
+ * This function is called when allocating a huge page folio to determine if
+ * the memcg has the capacity for it. It does not commit the charge yet,
+ * as the hugetlb folio itself has not been obtained from the hugetlb pool.
+ *
+ * Once we have obtained the hugetlb folio, we can call
+ * mem_cgroup_commit_charge() to commit the charge. If we fail to obtain the
+ * folio, we should instead call mem_cgroup_cancel_charge() to undo the effect
+ * of try_charge().
+ *
+ * Returns 0 on success. Otherwise, an error code is returned.
+ */
+int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp,
+ long nr_pages)
+{
+ /*
+ * If hugetlb memcg charging is not enabled, do not fail hugetlb allocation,
+ * but do not attempt to commit charge later (or cancel on error) either.
+ */
+ if (mem_cgroup_disabled() || !memcg ||
+ !cgroup_subsys_on_dfl(memory_cgrp_subsys) ||
+ !(cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING))
+ return -EOPNOTSUPP;
+
+ if (try_charge(memcg, gfp, nr_pages))
+ return -ENOMEM;
+
+ return 0;
+}
+
/**
* mem_cgroup_swapin_charge_folio - Charge a newly allocated folio for swapin.
* @folio: folio to charge.
}
/**
- * mem_cgroup_migrate - Charge a folio's replacement.
+ * mem_cgroup_replace_folio - Charge a folio's replacement.
* @old: Currently circulating folio.
* @new: Replacement folio.
*
* Charge @new as a replacement folio for @old. @old will
- * be uncharged upon free.
+ * be uncharged upon free. This is only used by the page cache
+ * (in replace_page_cache_folio()).
*
* Both folios must be locked, @new->mapping must be set up.
*/
-void mem_cgroup_migrate(struct folio *old, struct folio *new)
+void mem_cgroup_replace_folio(struct folio *old, struct folio *new)
{
struct mem_cgroup *memcg;
long nr_pages = folio_nr_pages(new);
local_irq_restore(flags);
}
+/**
+ * mem_cgroup_migrate - Transfer the memcg data from the old to the new folio.
+ * @old: Currently circulating folio.
+ * @new: Replacement folio.
+ *
+ * Transfer the memcg data from the old folio to the new folio for migration.
+ * The old folio's data info will be cleared. Note that the memory counters
+ * will remain unchanged throughout the process.
+ *
+ * Both folios must be locked, @new->mapping must be set up.
+ */
+void mem_cgroup_migrate(struct folio *old, struct folio *new)
+{
+ struct mem_cgroup *memcg;
+
+ VM_BUG_ON_FOLIO(!folio_test_locked(old), old);
+ VM_BUG_ON_FOLIO(!folio_test_locked(new), new);
+ VM_BUG_ON_FOLIO(folio_test_anon(old) != folio_test_anon(new), new);
+ VM_BUG_ON_FOLIO(folio_nr_pages(old) != folio_nr_pages(new), new);
+
+ if (mem_cgroup_disabled())
+ return;
+
+ memcg = folio_memcg(old);
+ /*
+ * Note that it is normal to see !memcg for a hugetlb folio.
+ * For e.g, itt could have been allocated when memory_hugetlb_accounting
+ * was not selected.
+ */
+ VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(old) && !memcg, old);
+ if (!memcg)
+ return;
+
+ /* Transfer the charge and the css ref */
+ commit_charge(new, memcg);
+ old->memcg_data = 0;
+}
+
DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
EXPORT_SYMBOL(memcg_sockets_enabled_key);