#include <linux/cpuset.h>
#include <linux/compaction.h>
#include <linux/notifier.h>
-#include <linux/mutex.h>
+#include <linux/rwsem.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/khugepaged.h>
#include <linux/rculist_nulls.h>
#include <linux/random.h>
-#include <linux/srcu.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
int vm_swappiness = 60;
LIST_HEAD(shrinker_list);
-DEFINE_MUTEX(shrinker_mutex);
-DEFINE_SRCU(shrinker_srcu);
-static atomic_t shrinker_srcu_generation = ATOMIC_INIT(0);
+DECLARE_RWSEM(shrinker_rwsem);
#ifdef CONFIG_MEMCG
static int shrinker_nr_max;
static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg,
int nid)
{
- return srcu_dereference_check(memcg->nodeinfo[nid]->shrinker_info,
- &shrinker_srcu,
- lockdep_is_held(&shrinker_mutex));
-}
-
-static struct shrinker_info *shrinker_info_srcu(struct mem_cgroup *memcg,
- int nid)
-{
- return srcu_dereference(memcg->nodeinfo[nid]->shrinker_info,
- &shrinker_srcu);
-}
-
-static void free_shrinker_info_rcu(struct rcu_head *head)
-{
- kvfree(container_of(head, struct shrinker_info, rcu));
+ return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info,
+ lockdep_is_held(&shrinker_rwsem));
}
static int expand_one_shrinker_info(struct mem_cgroup *memcg,
defer_size - old_defer_size);
rcu_assign_pointer(pn->shrinker_info, new);
- call_srcu(&shrinker_srcu, &old->rcu, free_shrinker_info_rcu);
+ kvfree_rcu(old, rcu);
}
return 0;
int nid, size, ret = 0;
int map_size, defer_size = 0;
- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);
map_size = shrinker_map_size(shrinker_nr_max);
defer_size = shrinker_defer_size(shrinker_nr_max);
size = map_size + defer_size;
info->map_nr_max = shrinker_nr_max;
rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info);
}
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);
return ret;
}
if (!root_mem_cgroup)
goto out;
- lockdep_assert_held(&shrinker_mutex);
+ lockdep_assert_held(&shrinker_rwsem);
map_size = shrinker_map_size(new_nr_max);
defer_size = shrinker_defer_size(new_nr_max);
{
if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) {
struct shrinker_info *info;
- int srcu_idx;
- srcu_idx = srcu_read_lock(&shrinker_srcu);
- info = shrinker_info_srcu(memcg, nid);
+ rcu_read_lock();
+ info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) {
/* Pairs with smp mb in shrink_slab() */
smp_mb__before_atomic();
set_bit(shrinker_id, info->map);
}
- srcu_read_unlock(&shrinker_srcu, srcu_idx);
+ rcu_read_unlock();
}
}
if (mem_cgroup_disabled())
return -ENOSYS;
- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);
+ /* This may call shrinker, so it must use down_read_trylock() */
id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
if (id < 0)
goto unlock;
shrinker->id = id;
ret = 0;
unlock:
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);
return ret;
}
BUG_ON(id < 0);
- lockdep_assert_held(&shrinker_mutex);
+ lockdep_assert_held(&shrinker_rwsem);
idr_remove(&shrinker_idr, id);
}
{
struct shrinker_info *info;
- info = shrinker_info_srcu(memcg, nid);
+ info = shrinker_info_protected(memcg, nid);
return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0);
}
{
struct shrinker_info *info;
- info = shrinker_info_srcu(memcg, nid);
+ info = shrinker_info_protected(memcg, nid);
return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]);
}
parent = root_mem_cgroup;
/* Prevent from concurrent shrinker_info expand */
- mutex_lock(&shrinker_mutex);
+ down_read(&shrinker_rwsem);
for_each_node(nid) {
child_info = shrinker_info_protected(memcg, nid);
parent_info = shrinker_info_protected(parent, nid);
atomic_long_add(nr, &parent_info->nr_deferred[i]);
}
}
- mutex_unlock(&shrinker_mutex);
+ up_read(&shrinker_rwsem);
}
+/* Returns true for reclaim through cgroup limits or cgroup interfaces. */
static bool cgroup_reclaim(struct scan_control *sc)
{
return sc->target_mem_cgroup;
}
-static bool global_reclaim(struct scan_control *sc)
+/*
+ * Returns true for reclaim on the root cgroup. This is true for direct
+ * allocator reclaim and reclaim through cgroup interfaces on the root cgroup.
+ */
+static bool root_reclaim(struct scan_control *sc)
{
return !sc->target_mem_cgroup || mem_cgroup_is_root(sc->target_mem_cgroup);
}
return false;
}
-static bool global_reclaim(struct scan_control *sc)
+static bool root_reclaim(struct scan_control *sc)
{
return true;
}
* memcg reclaim, to make reporting more accurate and reduce
* underestimation, but it's probably not worth the complexity for now.
*/
- if (current->reclaim_state && global_reclaim(sc)) {
+ if (current->reclaim_state && root_reclaim(sc)) {
sc->nr_reclaimed += current->reclaim_state->reclaimed;
current->reclaim_state->reclaimed = 0;
}
shrinker->name = NULL;
#endif
if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);
unregister_memcg_shrinker(shrinker);
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);
return;
}
void register_shrinker_prepared(struct shrinker *shrinker)
{
- mutex_lock(&shrinker_mutex);
- list_add_tail_rcu(&shrinker->list, &shrinker_list);
+ down_write(&shrinker_rwsem);
+ list_add_tail(&shrinker->list, &shrinker_list);
shrinker->flags |= SHRINKER_REGISTERED;
shrinker_debugfs_add(shrinker);
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);
}
static int __register_shrinker(struct shrinker *shrinker)
if (!(shrinker->flags & SHRINKER_REGISTERED))
return;
- mutex_lock(&shrinker_mutex);
- list_del_rcu(&shrinker->list);
+ down_write(&shrinker_rwsem);
+ list_del(&shrinker->list);
shrinker->flags &= ~SHRINKER_REGISTERED;
if (shrinker->flags & SHRINKER_MEMCG_AWARE)
unregister_memcg_shrinker(shrinker);
debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id);
- mutex_unlock(&shrinker_mutex);
-
- atomic_inc(&shrinker_srcu_generation);
- synchronize_srcu(&shrinker_srcu);
+ up_write(&shrinker_rwsem);
shrinker_debugfs_remove(debugfs_entry, debugfs_id);
/**
* synchronize_shrinkers - Wait for all running shrinkers to complete.
*
- * This is useful to guarantee that all shrinker invocations have seen an
- * update, before freeing memory.
+ * This is equivalent to calling unregister_shrink() and register_shrinker(),
+ * but atomically and with less overhead. This is useful to guarantee that all
+ * shrinker invocations have seen an update, before freeing memory, similar to
+ * rcu.
*/
void synchronize_shrinkers(void)
{
- atomic_inc(&shrinker_srcu_generation);
- synchronize_srcu(&shrinker_srcu);
+ down_write(&shrinker_rwsem);
+ up_write(&shrinker_rwsem);
}
EXPORT_SYMBOL(synchronize_shrinkers);
{
struct shrinker_info *info;
unsigned long ret, freed = 0;
- int srcu_idx, generation;
- int i = 0;
+ int i;
if (!mem_cgroup_online(memcg))
return 0;
-again:
- srcu_idx = srcu_read_lock(&shrinker_srcu);
- info = shrinker_info_srcu(memcg, nid);
+ if (!down_read_trylock(&shrinker_rwsem))
+ return 0;
+
+ info = shrinker_info_protected(memcg, nid);
if (unlikely(!info))
goto unlock;
- generation = atomic_read(&shrinker_srcu_generation);
- for_each_set_bit_from(i, info->map, info->map_nr_max) {
+ for_each_set_bit(i, info->map, info->map_nr_max) {
struct shrink_control sc = {
.gfp_mask = gfp_mask,
.nid = nid,
set_shrinker_bit(memcg, nid, i);
}
freed += ret;
- if (atomic_read(&shrinker_srcu_generation) != generation) {
- srcu_read_unlock(&shrinker_srcu, srcu_idx);
- i++;
- goto again;
+
+ if (rwsem_is_contended(&shrinker_rwsem)) {
+ freed = freed ? : 1;
+ break;
}
}
unlock:
- srcu_read_unlock(&shrinker_srcu, srcu_idx);
+ up_read(&shrinker_rwsem);
return freed;
}
#else /* CONFIG_MEMCG */
{
unsigned long ret, freed = 0;
struct shrinker *shrinker;
- int srcu_idx, generation;
/*
* The root memcg might be allocated even though memcg is disabled
if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))
return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
- srcu_idx = srcu_read_lock(&shrinker_srcu);
+ if (!down_read_trylock(&shrinker_rwsem))
+ goto out;
- generation = atomic_read(&shrinker_srcu_generation);
- list_for_each_entry_srcu(shrinker, &shrinker_list, list,
- srcu_read_lock_held(&shrinker_srcu)) {
+ list_for_each_entry(shrinker, &shrinker_list, list) {
struct shrink_control sc = {
.gfp_mask = gfp_mask,
.nid = nid,
if (ret == SHRINK_EMPTY)
ret = 0;
freed += ret;
-
- if (atomic_read(&shrinker_srcu_generation) != generation) {
+ /*
+ * Bail out if someone want to register a new shrinker to
+ * prevent the registration from being stalled for long periods
+ * by parallel ongoing shrinking.
+ */
+ if (rwsem_is_contended(&shrinker_rwsem)) {
freed = freed ? : 1;
break;
}
}
- srcu_read_unlock(&shrinker_srcu, srcu_idx);
+ up_read(&shrinker_rwsem);
+out:
cond_resched();
return freed;
}
mapping->a_ops->is_dirty_writeback(folio, dirty, writeback);
}
-static struct page *alloc_demote_page(struct page *page, unsigned long private)
+static struct folio *alloc_demote_folio(struct folio *src,
+ unsigned long private)
{
- struct page *target_page;
+ struct folio *dst;
nodemask_t *allowed_mask;
struct migration_target_control *mtc;
*/
mtc->nmask = NULL;
mtc->gfp_mask |= __GFP_THISNODE;
- target_page = alloc_migration_target(page, (unsigned long)mtc);
- if (target_page)
- return target_page;
+ dst = alloc_migration_target(src, (unsigned long)mtc);
+ if (dst)
+ return dst;
mtc->gfp_mask &= ~__GFP_THISNODE;
mtc->nmask = allowed_mask;
- return alloc_migration_target(page, (unsigned long)mtc);
+ return alloc_migration_target(src, (unsigned long)mtc);
}
/*
node_get_allowed_targets(pgdat, &allowed_mask);
/* Demotion ignores all cpuset and mempolicy settings */
- migrate_pages(demote_folios, alloc_demote_page, NULL,
+ migrate_pages(demote_folios, alloc_demote_folio, NULL,
(unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
&nr_succeeded);
}
+#ifdef CONFIG_CMA
+/*
+ * It is waste of effort to scan and reclaim CMA pages if it is not available
+ * for current allocation context. Kswapd can not be enrolled as it can not
+ * distinguish this scenario by using sc->gfp_mask = GFP_KERNEL
+ */
+static bool skip_cma(struct folio *folio, struct scan_control *sc)
+{
+ return !current_is_kswapd() &&
+ gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE &&
+ get_pageblock_migratetype(&folio->page) == MIGRATE_CMA;
+}
+#else
+static bool skip_cma(struct folio *folio, struct scan_control *sc)
+{
+ return false;
+}
+#endif
+
/*
* Isolating page from the lruvec to fill in @dst list by nr_to_scan times.
*
nr_pages = folio_nr_pages(folio);
total_scan += nr_pages;
- if (folio_zonenum(folio) > sc->reclaim_idx) {
+ if (folio_zonenum(folio) > sc->reclaim_idx ||
+ skip_cma(folio, sc)) {
nr_skipped[folio_zonenum(folio)] += nr_pages;
move_to = &folios_skipped;
goto move;
* won't get blocked by normal direct-reclaimers, forming a circular
* deadlock.
*/
- if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS))
+ if (gfp_has_io_fs(sc->gfp_mask))
inactive >>= 3;
too_many = isolated > inactive;
#define get_cap(cap) static_branch_unlikely(&lru_gen_caps[cap])
#endif
+static bool should_walk_mmu(void)
+{
+ return arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK);
+}
+
+static bool should_clear_pmd_young(void)
+{
+ return arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG);
+}
+
/******************************************************************************
* shorthand helpers
******************************************************************************/
struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
int old_gen, new_gen = lru_gen_from_seq(walk->max_seq);
- VM_WARN_ON_ONCE(pmd_leaf(*pmd));
-
- ptl = pte_lockptr(args->mm, pmd);
- if (!spin_trylock(ptl))
+ pte = pte_offset_map_nolock(args->mm, pmd, start & PMD_MASK, &ptl);
+ if (!pte)
+ return false;
+ if (!spin_trylock(ptl)) {
+ pte_unmap(pte);
return false;
+ }
arch_enter_lazy_mmu_mode();
-
- pte = pte_offset_map(pmd, start & PMD_MASK);
restart:
for (i = pte_index(start), addr = start; addr != end; i++, addr += PAGE_SIZE) {
unsigned long pfn;
struct folio *folio;
+ pte_t ptent = ptep_get(pte + i);
total++;
walk->mm_stats[MM_LEAF_TOTAL]++;
- pfn = get_pte_pfn(pte[i], args->vma, addr);
+ pfn = get_pte_pfn(ptent, args->vma, addr);
if (pfn == -1)
continue;
- if (!pte_young(pte[i])) {
+ if (!pte_young(ptent)) {
walk->mm_stats[MM_LEAF_OLD]++;
continue;
}
young++;
walk->mm_stats[MM_LEAF_YOUNG]++;
- if (pte_dirty(pte[i]) && !folio_test_dirty(folio) &&
+ if (pte_dirty(ptent) && !folio_test_dirty(folio) &&
!(folio_test_anon(folio) && folio_test_swapbacked(folio) &&
!folio_test_swapcache(folio)))
folio_mark_dirty(folio);
if (i < PTRS_PER_PTE && get_next_vma(PMD_MASK, PAGE_SIZE, args, &start, &end))
goto restart;
- pte_unmap(pte);
-
arch_leave_lazy_mmu_mode();
- spin_unlock(ptl);
+ pte_unmap_unlock(pte, ptl);
return suitable_to_scan(total, young);
}
goto next;
if (!pmd_trans_huge(pmd[i])) {
- if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG))
+ if (should_clear_pmd_young())
pmdp_test_and_clear_young(vma, addr, pmd + i);
goto next;
}
unsigned long next;
unsigned long addr;
struct vm_area_struct *vma;
- unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)];
+ DECLARE_BITMAP(bitmap, MIN_LRU_BATCH);
unsigned long first = -1;
struct lru_gen_mm_walk *walk = args->private;
#endif
walk->mm_stats[MM_NONLEAF_TOTAL]++;
- if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) {
+ if (should_clear_pmd_young()) {
if (!pmd_young(val))
continue;
* handful of PTEs. Spreading the work out over a period of time usually
* is less efficient, but it avoids bursty page faults.
*/
- if (!arch_has_hw_pte_young() || !get_cap(LRU_GEN_MM_WALK)) {
+ if (!should_walk_mmu()) {
success = iterate_mm_list_nowalk(lruvec, max_seq);
goto done;
}
for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) {
unsigned long pfn;
+ pte_t ptent = ptep_get(pte + i);
- pfn = get_pte_pfn(pte[i], pvmw->vma, addr);
+ pfn = get_pte_pfn(ptent, pvmw->vma, addr);
if (pfn == -1)
continue;
- if (!pte_young(pte[i]))
+ if (!pte_young(ptent))
continue;
folio = get_pfn_folio(pfn, memcg, pgdat, !walk || walk->can_swap);
young++;
- if (pte_dirty(pte[i]) && !folio_test_dirty(folio) &&
+ if (pte_dirty(ptent) && !folio_test_dirty(folio) &&
!(folio_test_anon(folio) && folio_test_swapbacked(folio) &&
!folio_test_swapcache(folio)))
folio_mark_dirty(folio);
{
int seg;
int old, new;
+ unsigned long flags;
int bin = get_random_u32_below(MEMCG_NR_BINS);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
- spin_lock(&pgdat->memcg_lru.lock);
+ spin_lock_irqsave(&pgdat->memcg_lru.lock, flags);
VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list));
if (!pgdat->memcg_lru.nr_memcgs[old] && old == get_memcg_gen(pgdat->memcg_lru.seq))
WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
- spin_unlock(&pgdat->memcg_lru.lock);
+ spin_unlock_irqrestore(&pgdat->memcg_lru.lock, flags);
}
void lru_gen_online_memcg(struct mem_cgroup *memcg)
struct pglist_data *pgdat = NODE_DATA(nid);
struct lruvec *lruvec = get_lruvec(memcg, nid);
- spin_lock(&pgdat->memcg_lru.lock);
+ spin_lock_irq(&pgdat->memcg_lru.lock);
VM_WARN_ON_ONCE(!hlist_nulls_unhashed(&lruvec->lrugen.list));
lruvec->lrugen.gen = gen;
- spin_unlock(&pgdat->memcg_lru.lock);
+ spin_unlock_irq(&pgdat->memcg_lru.lock);
}
}
struct pglist_data *pgdat = NODE_DATA(nid);
struct lruvec *lruvec = get_lruvec(memcg, nid);
- spin_lock(&pgdat->memcg_lru.lock);
+ spin_lock_irq(&pgdat->memcg_lru.lock);
VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list));
if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq))
WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
- spin_unlock(&pgdat->memcg_lru.lock);
+ spin_unlock_irq(&pgdat->memcg_lru.lock);
}
}
-void lru_gen_soft_reclaim(struct lruvec *lruvec)
+void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid)
{
+ struct lruvec *lruvec = get_lruvec(memcg, nid);
+
/* see the comment on MEMCG_NR_GENS */
if (lru_gen_memcg_seg(lruvec) != MEMCG_LRU_HEAD)
lru_gen_rotate_memcg(lruvec, MEMCG_LRU_HEAD);
WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
lrugen->protected[hist][type][tier - 1] + delta);
- __mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + type, delta);
return true;
}
static unsigned long get_nr_to_reclaim(struct scan_control *sc)
{
/* don't abort memcg reclaim to ensure fairness */
- if (!global_reclaim(sc))
+ if (!root_reclaim(sc))
return -1;
return max(sc->nr_to_reclaim, compact_gap(sc->order));
{
struct blk_plug plug;
- VM_WARN_ON_ONCE(global_reclaim(sc));
+ VM_WARN_ON_ONCE(root_reclaim(sc));
VM_WARN_ON_ONCE(!sc->may_writepage || !sc->may_unmap);
lru_add_drain();
struct blk_plug plug;
unsigned long reclaimed = sc->nr_reclaimed;
- VM_WARN_ON_ONCE(!global_reclaim(sc));
+ VM_WARN_ON_ONCE(!root_reclaim(sc));
/*
* Unmapped clean folios are already prioritized. Scanning for more of
if (get_cap(LRU_GEN_CORE))
caps |= BIT(LRU_GEN_CORE);
- if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))
+ if (should_walk_mmu())
caps |= BIT(LRU_GEN_MM_WALK);
- if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG))
+ if (should_clear_pmd_young())
caps |= BIT(LRU_GEN_NONLEAF_YOUNG);
return sysfs_emit(buf, "0x%04x\n", caps);
bool proportional_reclaim;
struct blk_plug plug;
- if (lru_gen_enabled() && !global_reclaim(sc)) {
+ if (lru_gen_enabled() && !root_reclaim(sc)) {
lru_gen_shrink_lruvec(lruvec, sc);
return;
}
if (!managed_zone(zone))
continue;
- switch (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx)) {
- case COMPACT_SUCCESS:
- case COMPACT_CONTINUE:
+ /* Allocation can already succeed, nothing to do */
+ if (zone_watermark_ok(zone, sc->order, min_wmark_pages(zone),
+ sc->reclaim_idx, 0))
+ return false;
+
+ if (compaction_suitable(zone, sc->order, sc->reclaim_idx))
return false;
- default:
- /* check next zone */
- ;
- }
}
/*
struct lruvec *target_lruvec;
bool reclaimable = false;
- if (lru_gen_enabled() && global_reclaim(sc)) {
+ if (lru_gen_enabled() && root_reclaim(sc)) {
lru_gen_shrink_node(pgdat, sc);
return;
}
* Legacy memcg will stall in page writeback so avoid forcibly
* stalling in reclaim_throttle().
*/
- if ((current_is_kswapd() ||
- (cgroup_reclaim(sc) && writeback_throttling_sane(sc))) &&
- sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
- set_bit(LRUVEC_CONGESTED, &target_lruvec->flags);
+ if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested) {
+ if (cgroup_reclaim(sc) && writeback_throttling_sane(sc))
+ set_bit(LRUVEC_CGROUP_CONGESTED, &target_lruvec->flags);
+
+ if (current_is_kswapd())
+ set_bit(LRUVEC_NODE_CONGESTED, &target_lruvec->flags);
+ }
/*
* Stall direct reclaim for IO completions if the lruvec is
*/
if (!current_is_kswapd() && current_may_throttle() &&
!sc->hibernation_mode &&
- test_bit(LRUVEC_CONGESTED, &target_lruvec->flags))
+ (test_bit(LRUVEC_CGROUP_CONGESTED, &target_lruvec->flags) ||
+ test_bit(LRUVEC_NODE_CONGESTED, &target_lruvec->flags)))
reclaim_throttle(pgdat, VMSCAN_THROTTLE_CONGESTED);
if (should_continue_reclaim(pgdat, nr_node_reclaimed, sc))
static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
{
unsigned long watermark;
- enum compact_result suitable;
- suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx);
- if (suitable == COMPACT_SUCCESS)
- /* Allocation should succeed already. Don't reclaim. */
+ /* Allocation can already succeed, nothing to do */
+ if (zone_watermark_ok(zone, sc->order, min_wmark_pages(zone),
+ sc->reclaim_idx, 0))
return true;
- if (suitable == COMPACT_SKIPPED)
- /* Compaction cannot yet proceed. Do reclaim. */
+
+ /* Compaction cannot yet proceed. Do reclaim. */
+ if (!compaction_suitable(zone, sc->order, sc->reclaim_idx))
return false;
/*
lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup,
zone->zone_pgdat);
- clear_bit(LRUVEC_CONGESTED, &lruvec->flags);
+ clear_bit(LRUVEC_CGROUP_CONGESTED, &lruvec->flags);
}
}
continue;
pfmemalloc_reserve += min_wmark_pages(zone);
- free_pages += zone_page_state(zone, NR_FREE_PAGES);
+ free_pages += zone_page_state_snapshot(zone, NR_FREE_PAGES);
}
/* If there are no reserves (unexpected config) then do not throttle */
{
struct lruvec *lruvec = mem_cgroup_lruvec(NULL, pgdat);
- clear_bit(LRUVEC_CONGESTED, &lruvec->flags);
+ clear_bit(LRUVEC_NODE_CONGESTED, &lruvec->flags);
+ clear_bit(LRUVEC_CGROUP_CONGESTED, &lruvec->flags);
clear_bit(PGDAT_DIRTY, &pgdat->flags);
clear_bit(PGDAT_WRITEBACK, &pgdat->flags);
}
/*
* This kswapd start function will be called by init and node-hot-add.
*/
-void kswapd_run(int nid)
+void __meminit kswapd_run(int nid)
{
pg_data_t *pgdat = NODE_DATA(nid);
* Called by memory hotplug when all memory in a node is offlined. Caller must
* be holding mem_hotplug_begin/done().
*/
-void kswapd_stop(int nid)
+void __meminit kswapd_stop(int nid)
{
pg_data_t *pgdat = NODE_DATA(nid);
struct task_struct *kswapd;
}
#endif
-void check_move_unevictable_pages(struct pagevec *pvec)
-{
- struct folio_batch fbatch;
- unsigned i;
-
- folio_batch_init(&fbatch);
- for (i = 0; i < pvec->nr; i++) {
- struct page *page = pvec->pages[i];
-
- if (PageTransTail(page))
- continue;
- folio_batch_add(&fbatch, page_folio(page));
- }
- check_move_unevictable_folios(&fbatch);
-}
-EXPORT_SYMBOL_GPL(check_move_unevictable_pages);
-
/**
* check_move_unevictable_folios - Move evictable folios to appropriate zone
* lru list