1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
4 * Authors: David Chinner and Glauber Costa
6 * Generic LRU infrastructure
8 #include <linux/kernel.h>
9 #include <linux/module.h>
11 #include <linux/list_lru.h>
12 #include <linux/slab.h>
13 #include <linux/mutex.h>
14 #include <linux/memcontrol.h>
18 #ifdef CONFIG_MEMCG_KMEM
19 static LIST_HEAD(memcg_list_lrus);
20 static DEFINE_MUTEX(list_lrus_mutex);
22 static inline bool list_lru_memcg_aware(struct list_lru *lru)
24 return lru->memcg_aware;
27 static void list_lru_register(struct list_lru *lru)
29 if (!list_lru_memcg_aware(lru))
32 mutex_lock(&list_lrus_mutex);
33 list_add(&lru->list, &memcg_list_lrus);
34 mutex_unlock(&list_lrus_mutex);
37 static void list_lru_unregister(struct list_lru *lru)
39 if (!list_lru_memcg_aware(lru))
42 mutex_lock(&list_lrus_mutex);
44 mutex_unlock(&list_lrus_mutex);
47 static int lru_shrinker_id(struct list_lru *lru)
49 return lru->shrinker_id;
52 static inline struct list_lru_one *
53 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
55 struct list_lru_memcg *mlrus;
56 struct list_lru_node *nlru = &lru->node[nid];
59 * Either lock or RCU protects the array of per cgroup lists
60 * from relocation (see memcg_update_list_lru).
62 mlrus = rcu_dereference_check(lru->mlrus, lockdep_is_held(&nlru->lock));
63 if (mlrus && idx >= 0) {
64 struct list_lru_per_memcg *mlru;
66 mlru = rcu_dereference_check(mlrus->mlru[idx], true);
67 return mlru ? &mlru->node[nid] : NULL;
72 static inline struct list_lru_one *
73 list_lru_from_kmem(struct list_lru *lru, int nid, void *ptr,
74 struct mem_cgroup **memcg_ptr)
76 struct list_lru_node *nlru = &lru->node[nid];
77 struct list_lru_one *l = &nlru->lru;
78 struct mem_cgroup *memcg = NULL;
83 memcg = mem_cgroup_from_obj(ptr);
87 l = list_lru_from_memcg_idx(lru, nid, memcg_cache_id(memcg));
94 static void list_lru_register(struct list_lru *lru)
98 static void list_lru_unregister(struct list_lru *lru)
102 static int lru_shrinker_id(struct list_lru *lru)
107 static inline bool list_lru_memcg_aware(struct list_lru *lru)
112 static inline struct list_lru_one *
113 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
115 return &lru->node[nid].lru;
118 static inline struct list_lru_one *
119 list_lru_from_kmem(struct list_lru *lru, int nid, void *ptr,
120 struct mem_cgroup **memcg_ptr)
124 return &lru->node[nid].lru;
126 #endif /* CONFIG_MEMCG_KMEM */
128 bool list_lru_add(struct list_lru *lru, struct list_head *item)
130 int nid = page_to_nid(virt_to_page(item));
131 struct list_lru_node *nlru = &lru->node[nid];
132 struct mem_cgroup *memcg;
133 struct list_lru_one *l;
135 spin_lock(&nlru->lock);
136 if (list_empty(item)) {
137 l = list_lru_from_kmem(lru, nid, item, &memcg);
138 list_add_tail(item, &l->list);
139 /* Set shrinker bit if the first element was added */
141 set_shrinker_bit(memcg, nid,
142 lru_shrinker_id(lru));
144 spin_unlock(&nlru->lock);
147 spin_unlock(&nlru->lock);
150 EXPORT_SYMBOL_GPL(list_lru_add);
152 bool list_lru_del(struct list_lru *lru, struct list_head *item)
154 int nid = page_to_nid(virt_to_page(item));
155 struct list_lru_node *nlru = &lru->node[nid];
156 struct list_lru_one *l;
158 spin_lock(&nlru->lock);
159 if (!list_empty(item)) {
160 l = list_lru_from_kmem(lru, nid, item, NULL);
164 spin_unlock(&nlru->lock);
167 spin_unlock(&nlru->lock);
170 EXPORT_SYMBOL_GPL(list_lru_del);
172 void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
177 EXPORT_SYMBOL_GPL(list_lru_isolate);
179 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
180 struct list_head *head)
182 list_move(item, head);
185 EXPORT_SYMBOL_GPL(list_lru_isolate_move);
187 unsigned long list_lru_count_one(struct list_lru *lru,
188 int nid, struct mem_cgroup *memcg)
190 struct list_lru_one *l;
194 l = list_lru_from_memcg_idx(lru, nid, memcg_cache_id(memcg));
195 count = l ? READ_ONCE(l->nr_items) : 0;
198 if (unlikely(count < 0))
203 EXPORT_SYMBOL_GPL(list_lru_count_one);
205 unsigned long list_lru_count_node(struct list_lru *lru, int nid)
207 struct list_lru_node *nlru;
209 nlru = &lru->node[nid];
210 return nlru->nr_items;
212 EXPORT_SYMBOL_GPL(list_lru_count_node);
215 __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
216 list_lru_walk_cb isolate, void *cb_arg,
217 unsigned long *nr_to_walk)
219 struct list_lru_node *nlru = &lru->node[nid];
220 struct list_lru_one *l;
221 struct list_head *item, *n;
222 unsigned long isolated = 0;
225 l = list_lru_from_memcg_idx(lru, nid, memcg_idx);
229 list_for_each_safe(item, n, &l->list) {
233 * decrement nr_to_walk first so that we don't livelock if we
234 * get stuck on large numbers of LRU_RETRY items
240 ret = isolate(item, l, &nlru->lock, cb_arg);
242 case LRU_REMOVED_RETRY:
243 assert_spin_locked(&nlru->lock);
249 * If the lru lock has been dropped, our list
250 * traversal is now invalid and so we have to
251 * restart from scratch.
253 if (ret == LRU_REMOVED_RETRY)
257 list_move_tail(item, &l->list);
263 * The lru lock has been dropped, our list traversal is
264 * now invalid and so we have to restart from scratch.
266 assert_spin_locked(&nlru->lock);
277 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
278 list_lru_walk_cb isolate, void *cb_arg,
279 unsigned long *nr_to_walk)
281 struct list_lru_node *nlru = &lru->node[nid];
284 spin_lock(&nlru->lock);
285 ret = __list_lru_walk_one(lru, nid, memcg_cache_id(memcg), isolate,
287 spin_unlock(&nlru->lock);
290 EXPORT_SYMBOL_GPL(list_lru_walk_one);
293 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
294 list_lru_walk_cb isolate, void *cb_arg,
295 unsigned long *nr_to_walk)
297 struct list_lru_node *nlru = &lru->node[nid];
300 spin_lock_irq(&nlru->lock);
301 ret = __list_lru_walk_one(lru, nid, memcg_cache_id(memcg), isolate,
303 spin_unlock_irq(&nlru->lock);
307 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
308 list_lru_walk_cb isolate, void *cb_arg,
309 unsigned long *nr_to_walk)
314 isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
316 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
317 for_each_memcg_cache_index(memcg_idx) {
318 struct list_lru_node *nlru = &lru->node[nid];
320 spin_lock(&nlru->lock);
321 isolated += __list_lru_walk_one(lru, nid, memcg_idx,
324 spin_unlock(&nlru->lock);
326 if (*nr_to_walk <= 0)
332 EXPORT_SYMBOL_GPL(list_lru_walk_node);
334 static void init_one_lru(struct list_lru_one *l)
336 INIT_LIST_HEAD(&l->list);
340 #ifdef CONFIG_MEMCG_KMEM
341 static void memcg_destroy_list_lru_range(struct list_lru_memcg *mlrus,
346 for (i = begin; i < end; i++)
347 kfree(mlrus->mlru[i]);
350 static struct list_lru_per_memcg *memcg_init_list_lru_one(gfp_t gfp)
353 struct list_lru_per_memcg *mlru;
355 mlru = kmalloc(struct_size(mlru, node, nr_node_ids), gfp);
360 init_one_lru(&mlru->node[nid]);
365 static void memcg_list_lru_free(struct list_lru *lru, int src_idx)
367 struct list_lru_memcg *mlrus;
368 struct list_lru_per_memcg *mlru;
370 spin_lock_irq(&lru->lock);
371 mlrus = rcu_dereference_protected(lru->mlrus, true);
372 mlru = rcu_dereference_protected(mlrus->mlru[src_idx], true);
373 rcu_assign_pointer(mlrus->mlru[src_idx], NULL);
374 spin_unlock_irq(&lru->lock);
377 * The __list_lru_walk_one() can walk the list of this node.
378 * We need kvfree_rcu() here. And the walking of the list
379 * is under lru->node[nid]->lock, which can serve as a RCU
380 * read-side critical section.
383 kvfree_rcu(mlru, rcu);
386 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
388 struct list_lru_memcg *mlrus;
389 int size = memcg_nr_cache_ids;
391 lru->memcg_aware = memcg_aware;
395 spin_lock_init(&lru->lock);
397 mlrus = kvzalloc(struct_size(mlrus, mlru, size), GFP_KERNEL);
401 RCU_INIT_POINTER(lru->mlrus, mlrus);
406 static void memcg_destroy_list_lru(struct list_lru *lru)
408 struct list_lru_memcg *mlrus;
410 if (!list_lru_memcg_aware(lru))
414 * This is called when shrinker has already been unregistered,
415 * and nobody can use it. So, there is no need to use kvfree_rcu().
417 mlrus = rcu_dereference_protected(lru->mlrus, true);
418 memcg_destroy_list_lru_range(mlrus, 0, memcg_nr_cache_ids);
422 static int memcg_update_list_lru(struct list_lru *lru, int old_size, int new_size)
424 struct list_lru_memcg *old, *new;
426 BUG_ON(old_size > new_size);
428 old = rcu_dereference_protected(lru->mlrus,
429 lockdep_is_held(&list_lrus_mutex));
430 new = kvmalloc(struct_size(new, mlru, new_size), GFP_KERNEL);
434 spin_lock_irq(&lru->lock);
435 memcpy(&new->mlru, &old->mlru, flex_array_size(new, mlru, old_size));
436 memset(&new->mlru[old_size], 0, flex_array_size(new, mlru, new_size - old_size));
437 rcu_assign_pointer(lru->mlrus, new);
438 spin_unlock_irq(&lru->lock);
440 kvfree_rcu(old, rcu);
444 int memcg_update_all_list_lrus(int new_size)
447 struct list_lru *lru;
448 int old_size = memcg_nr_cache_ids;
450 mutex_lock(&list_lrus_mutex);
451 list_for_each_entry(lru, &memcg_list_lrus, list) {
452 ret = memcg_update_list_lru(lru, old_size, new_size);
456 mutex_unlock(&list_lrus_mutex);
460 static void memcg_reparent_list_lru_node(struct list_lru *lru, int nid,
461 int src_idx, struct mem_cgroup *dst_memcg)
463 struct list_lru_node *nlru = &lru->node[nid];
464 int dst_idx = dst_memcg->kmemcg_id;
465 struct list_lru_one *src, *dst;
468 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
469 * we have to use IRQ-safe primitives here to avoid deadlock.
471 spin_lock_irq(&nlru->lock);
473 src = list_lru_from_memcg_idx(lru, nid, src_idx);
476 dst = list_lru_from_memcg_idx(lru, nid, dst_idx);
478 list_splice_init(&src->list, &dst->list);
481 dst->nr_items += src->nr_items;
482 set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
486 spin_unlock_irq(&nlru->lock);
489 static void memcg_reparent_list_lru(struct list_lru *lru,
490 int src_idx, struct mem_cgroup *dst_memcg)
495 memcg_reparent_list_lru_node(lru, i, src_idx, dst_memcg);
497 memcg_list_lru_free(lru, src_idx);
500 void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent)
502 struct cgroup_subsys_state *css;
503 struct list_lru *lru;
504 int src_idx = memcg->kmemcg_id;
507 * Change kmemcg_id of this cgroup and all its descendants to the
508 * parent's id, and then move all entries from this cgroup's list_lrus
509 * to ones of the parent.
511 * After we have finished, all list_lrus corresponding to this cgroup
512 * are guaranteed to remain empty. So we can safely free this cgroup's
513 * list lrus in memcg_list_lru_free().
515 * Changing ->kmemcg_id to the parent can prevent memcg_list_lru_alloc()
516 * from allocating list lrus for this cgroup after memcg_list_lru_free()
520 css_for_each_descendant_pre(css, &memcg->css) {
521 struct mem_cgroup *child;
523 child = mem_cgroup_from_css(css);
524 child->kmemcg_id = parent->kmemcg_id;
528 mutex_lock(&list_lrus_mutex);
529 list_for_each_entry(lru, &memcg_list_lrus, list)
530 memcg_reparent_list_lru(lru, src_idx, parent);
531 mutex_unlock(&list_lrus_mutex);
534 static bool memcg_list_lru_allocated(struct mem_cgroup *memcg,
535 struct list_lru *lru)
540 idx = memcg->kmemcg_id;
541 if (unlikely(idx < 0))
545 allocated = !!rcu_access_pointer(rcu_dereference(lru->mlrus)->mlru[idx]);
551 int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
556 struct list_lru_memcg *mlrus;
557 struct list_lru_memcg_table {
558 struct list_lru_per_memcg *mlru;
559 struct mem_cgroup *memcg;
562 if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru))
565 gfp &= GFP_RECLAIM_MASK;
566 table = kmalloc_array(memcg->css.cgroup->level, sizeof(*table), gfp);
571 * Because the list_lru can be reparented to the parent cgroup's
572 * list_lru, we should make sure that this cgroup and all its
573 * ancestors have allocated list_lru_per_memcg.
575 for (i = 0; memcg; memcg = parent_mem_cgroup(memcg), i++) {
576 if (memcg_list_lru_allocated(memcg, lru))
579 table[i].memcg = memcg;
580 table[i].mlru = memcg_init_list_lru_one(gfp);
581 if (!table[i].mlru) {
583 kfree(table[i].mlru);
589 spin_lock_irqsave(&lru->lock, flags);
590 mlrus = rcu_dereference_protected(lru->mlrus, true);
592 int index = table[i].memcg->kmemcg_id;
593 struct list_lru_per_memcg *mlru = table[i].mlru;
595 if (index < 0 || rcu_dereference_protected(mlrus->mlru[index], true))
598 rcu_assign_pointer(mlrus->mlru[index], mlru);
600 spin_unlock_irqrestore(&lru->lock, flags);
607 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
612 static void memcg_destroy_list_lru(struct list_lru *lru)
615 #endif /* CONFIG_MEMCG_KMEM */
617 int __list_lru_init(struct list_lru *lru, bool memcg_aware,
618 struct lock_class_key *key, struct shrinker *shrinker)
623 #ifdef CONFIG_MEMCG_KMEM
625 lru->shrinker_id = shrinker->id;
627 lru->shrinker_id = -1;
629 memcg_get_cache_ids();
631 lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
636 spin_lock_init(&lru->node[i].lock);
638 lockdep_set_class(&lru->node[i].lock, key);
639 init_one_lru(&lru->node[i].lru);
642 err = memcg_init_list_lru(lru, memcg_aware);
645 /* Do this so a list_lru_destroy() doesn't crash: */
650 list_lru_register(lru);
652 memcg_put_cache_ids();
655 EXPORT_SYMBOL_GPL(__list_lru_init);
657 void list_lru_destroy(struct list_lru *lru)
659 /* Already destroyed or not yet initialized? */
663 memcg_get_cache_ids();
665 list_lru_unregister(lru);
667 memcg_destroy_list_lru(lru);
671 #ifdef CONFIG_MEMCG_KMEM
672 lru->shrinker_id = -1;
674 memcg_put_cache_ids();
676 EXPORT_SYMBOL_GPL(list_lru_destroy);