mm: list_lru: rename memcg_drain_all_list_lrus to memcg_reparent_list_lrus
[linux-2.6-microblaze.git] / mm / list_lru.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
4  * Authors: David Chinner and Glauber Costa
5  *
6  * Generic LRU infrastructure
7  */
8 #include <linux/kernel.h>
9 #include <linux/module.h>
10 #include <linux/mm.h>
11 #include <linux/list_lru.h>
12 #include <linux/slab.h>
13 #include <linux/mutex.h>
14 #include <linux/memcontrol.h>
15 #include "slab.h"
16 #include "internal.h"
17
18 #ifdef CONFIG_MEMCG_KMEM
19 static LIST_HEAD(memcg_list_lrus);
20 static DEFINE_MUTEX(list_lrus_mutex);
21
22 static inline bool list_lru_memcg_aware(struct list_lru *lru)
23 {
24         return lru->memcg_aware;
25 }
26
27 static void list_lru_register(struct list_lru *lru)
28 {
29         if (!list_lru_memcg_aware(lru))
30                 return;
31
32         mutex_lock(&list_lrus_mutex);
33         list_add(&lru->list, &memcg_list_lrus);
34         mutex_unlock(&list_lrus_mutex);
35 }
36
37 static void list_lru_unregister(struct list_lru *lru)
38 {
39         if (!list_lru_memcg_aware(lru))
40                 return;
41
42         mutex_lock(&list_lrus_mutex);
43         list_del(&lru->list);
44         mutex_unlock(&list_lrus_mutex);
45 }
46
47 static int lru_shrinker_id(struct list_lru *lru)
48 {
49         return lru->shrinker_id;
50 }
51
52 static inline struct list_lru_one *
53 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
54 {
55         struct list_lru_memcg *mlrus;
56         struct list_lru_node *nlru = &lru->node[nid];
57
58         /*
59          * Either lock or RCU protects the array of per cgroup lists
60          * from relocation (see memcg_update_list_lru).
61          */
62         mlrus = rcu_dereference_check(lru->mlrus, lockdep_is_held(&nlru->lock));
63         if (mlrus && idx >= 0) {
64                 struct list_lru_per_memcg *mlru;
65
66                 mlru = rcu_dereference_check(mlrus->mlru[idx], true);
67                 return mlru ? &mlru->node[nid] : NULL;
68         }
69         return &nlru->lru;
70 }
71
72 static inline struct list_lru_one *
73 list_lru_from_kmem(struct list_lru *lru, int nid, void *ptr,
74                    struct mem_cgroup **memcg_ptr)
75 {
76         struct list_lru_node *nlru = &lru->node[nid];
77         struct list_lru_one *l = &nlru->lru;
78         struct mem_cgroup *memcg = NULL;
79
80         if (!lru->mlrus)
81                 goto out;
82
83         memcg = mem_cgroup_from_obj(ptr);
84         if (!memcg)
85                 goto out;
86
87         l = list_lru_from_memcg_idx(lru, nid, memcg_cache_id(memcg));
88 out:
89         if (memcg_ptr)
90                 *memcg_ptr = memcg;
91         return l;
92 }
93 #else
94 static void list_lru_register(struct list_lru *lru)
95 {
96 }
97
98 static void list_lru_unregister(struct list_lru *lru)
99 {
100 }
101
102 static int lru_shrinker_id(struct list_lru *lru)
103 {
104         return -1;
105 }
106
107 static inline bool list_lru_memcg_aware(struct list_lru *lru)
108 {
109         return false;
110 }
111
112 static inline struct list_lru_one *
113 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
114 {
115         return &lru->node[nid].lru;
116 }
117
118 static inline struct list_lru_one *
119 list_lru_from_kmem(struct list_lru *lru, int nid, void *ptr,
120                    struct mem_cgroup **memcg_ptr)
121 {
122         if (memcg_ptr)
123                 *memcg_ptr = NULL;
124         return &lru->node[nid].lru;
125 }
126 #endif /* CONFIG_MEMCG_KMEM */
127
128 bool list_lru_add(struct list_lru *lru, struct list_head *item)
129 {
130         int nid = page_to_nid(virt_to_page(item));
131         struct list_lru_node *nlru = &lru->node[nid];
132         struct mem_cgroup *memcg;
133         struct list_lru_one *l;
134
135         spin_lock(&nlru->lock);
136         if (list_empty(item)) {
137                 l = list_lru_from_kmem(lru, nid, item, &memcg);
138                 list_add_tail(item, &l->list);
139                 /* Set shrinker bit if the first element was added */
140                 if (!l->nr_items++)
141                         set_shrinker_bit(memcg, nid,
142                                          lru_shrinker_id(lru));
143                 nlru->nr_items++;
144                 spin_unlock(&nlru->lock);
145                 return true;
146         }
147         spin_unlock(&nlru->lock);
148         return false;
149 }
150 EXPORT_SYMBOL_GPL(list_lru_add);
151
152 bool list_lru_del(struct list_lru *lru, struct list_head *item)
153 {
154         int nid = page_to_nid(virt_to_page(item));
155         struct list_lru_node *nlru = &lru->node[nid];
156         struct list_lru_one *l;
157
158         spin_lock(&nlru->lock);
159         if (!list_empty(item)) {
160                 l = list_lru_from_kmem(lru, nid, item, NULL);
161                 list_del_init(item);
162                 l->nr_items--;
163                 nlru->nr_items--;
164                 spin_unlock(&nlru->lock);
165                 return true;
166         }
167         spin_unlock(&nlru->lock);
168         return false;
169 }
170 EXPORT_SYMBOL_GPL(list_lru_del);
171
172 void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
173 {
174         list_del_init(item);
175         list->nr_items--;
176 }
177 EXPORT_SYMBOL_GPL(list_lru_isolate);
178
179 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
180                            struct list_head *head)
181 {
182         list_move(item, head);
183         list->nr_items--;
184 }
185 EXPORT_SYMBOL_GPL(list_lru_isolate_move);
186
187 unsigned long list_lru_count_one(struct list_lru *lru,
188                                  int nid, struct mem_cgroup *memcg)
189 {
190         struct list_lru_one *l;
191         long count;
192
193         rcu_read_lock();
194         l = list_lru_from_memcg_idx(lru, nid, memcg_cache_id(memcg));
195         count = l ? READ_ONCE(l->nr_items) : 0;
196         rcu_read_unlock();
197
198         if (unlikely(count < 0))
199                 count = 0;
200
201         return count;
202 }
203 EXPORT_SYMBOL_GPL(list_lru_count_one);
204
205 unsigned long list_lru_count_node(struct list_lru *lru, int nid)
206 {
207         struct list_lru_node *nlru;
208
209         nlru = &lru->node[nid];
210         return nlru->nr_items;
211 }
212 EXPORT_SYMBOL_GPL(list_lru_count_node);
213
214 static unsigned long
215 __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
216                     list_lru_walk_cb isolate, void *cb_arg,
217                     unsigned long *nr_to_walk)
218 {
219         struct list_lru_node *nlru = &lru->node[nid];
220         struct list_lru_one *l;
221         struct list_head *item, *n;
222         unsigned long isolated = 0;
223
224 restart:
225         l = list_lru_from_memcg_idx(lru, nid, memcg_idx);
226         if (!l)
227                 goto out;
228
229         list_for_each_safe(item, n, &l->list) {
230                 enum lru_status ret;
231
232                 /*
233                  * decrement nr_to_walk first so that we don't livelock if we
234                  * get stuck on large numbers of LRU_RETRY items
235                  */
236                 if (!*nr_to_walk)
237                         break;
238                 --*nr_to_walk;
239
240                 ret = isolate(item, l, &nlru->lock, cb_arg);
241                 switch (ret) {
242                 case LRU_REMOVED_RETRY:
243                         assert_spin_locked(&nlru->lock);
244                         fallthrough;
245                 case LRU_REMOVED:
246                         isolated++;
247                         nlru->nr_items--;
248                         /*
249                          * If the lru lock has been dropped, our list
250                          * traversal is now invalid and so we have to
251                          * restart from scratch.
252                          */
253                         if (ret == LRU_REMOVED_RETRY)
254                                 goto restart;
255                         break;
256                 case LRU_ROTATE:
257                         list_move_tail(item, &l->list);
258                         break;
259                 case LRU_SKIP:
260                         break;
261                 case LRU_RETRY:
262                         /*
263                          * The lru lock has been dropped, our list traversal is
264                          * now invalid and so we have to restart from scratch.
265                          */
266                         assert_spin_locked(&nlru->lock);
267                         goto restart;
268                 default:
269                         BUG();
270                 }
271         }
272 out:
273         return isolated;
274 }
275
276 unsigned long
277 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
278                   list_lru_walk_cb isolate, void *cb_arg,
279                   unsigned long *nr_to_walk)
280 {
281         struct list_lru_node *nlru = &lru->node[nid];
282         unsigned long ret;
283
284         spin_lock(&nlru->lock);
285         ret = __list_lru_walk_one(lru, nid, memcg_cache_id(memcg), isolate,
286                                   cb_arg, nr_to_walk);
287         spin_unlock(&nlru->lock);
288         return ret;
289 }
290 EXPORT_SYMBOL_GPL(list_lru_walk_one);
291
292 unsigned long
293 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
294                       list_lru_walk_cb isolate, void *cb_arg,
295                       unsigned long *nr_to_walk)
296 {
297         struct list_lru_node *nlru = &lru->node[nid];
298         unsigned long ret;
299
300         spin_lock_irq(&nlru->lock);
301         ret = __list_lru_walk_one(lru, nid, memcg_cache_id(memcg), isolate,
302                                   cb_arg, nr_to_walk);
303         spin_unlock_irq(&nlru->lock);
304         return ret;
305 }
306
307 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
308                                  list_lru_walk_cb isolate, void *cb_arg,
309                                  unsigned long *nr_to_walk)
310 {
311         long isolated = 0;
312         int memcg_idx;
313
314         isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
315                                       nr_to_walk);
316         if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
317                 for_each_memcg_cache_index(memcg_idx) {
318                         struct list_lru_node *nlru = &lru->node[nid];
319
320                         spin_lock(&nlru->lock);
321                         isolated += __list_lru_walk_one(lru, nid, memcg_idx,
322                                                         isolate, cb_arg,
323                                                         nr_to_walk);
324                         spin_unlock(&nlru->lock);
325
326                         if (*nr_to_walk <= 0)
327                                 break;
328                 }
329         }
330         return isolated;
331 }
332 EXPORT_SYMBOL_GPL(list_lru_walk_node);
333
334 static void init_one_lru(struct list_lru_one *l)
335 {
336         INIT_LIST_HEAD(&l->list);
337         l->nr_items = 0;
338 }
339
340 #ifdef CONFIG_MEMCG_KMEM
341 static void memcg_destroy_list_lru_range(struct list_lru_memcg *mlrus,
342                                          int begin, int end)
343 {
344         int i;
345
346         for (i = begin; i < end; i++)
347                 kfree(mlrus->mlru[i]);
348 }
349
350 static struct list_lru_per_memcg *memcg_init_list_lru_one(gfp_t gfp)
351 {
352         int nid;
353         struct list_lru_per_memcg *mlru;
354
355         mlru = kmalloc(struct_size(mlru, node, nr_node_ids), gfp);
356         if (!mlru)
357                 return NULL;
358
359         for_each_node(nid)
360                 init_one_lru(&mlru->node[nid]);
361
362         return mlru;
363 }
364
365 static void memcg_list_lru_free(struct list_lru *lru, int src_idx)
366 {
367         struct list_lru_memcg *mlrus;
368         struct list_lru_per_memcg *mlru;
369
370         spin_lock_irq(&lru->lock);
371         mlrus = rcu_dereference_protected(lru->mlrus, true);
372         mlru = rcu_dereference_protected(mlrus->mlru[src_idx], true);
373         rcu_assign_pointer(mlrus->mlru[src_idx], NULL);
374         spin_unlock_irq(&lru->lock);
375
376         /*
377          * The __list_lru_walk_one() can walk the list of this node.
378          * We need kvfree_rcu() here. And the walking of the list
379          * is under lru->node[nid]->lock, which can serve as a RCU
380          * read-side critical section.
381          */
382         if (mlru)
383                 kvfree_rcu(mlru, rcu);
384 }
385
386 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
387 {
388         struct list_lru_memcg *mlrus;
389         int size = memcg_nr_cache_ids;
390
391         lru->memcg_aware = memcg_aware;
392         if (!memcg_aware)
393                 return 0;
394
395         spin_lock_init(&lru->lock);
396
397         mlrus = kvzalloc(struct_size(mlrus, mlru, size), GFP_KERNEL);
398         if (!mlrus)
399                 return -ENOMEM;
400
401         RCU_INIT_POINTER(lru->mlrus, mlrus);
402
403         return 0;
404 }
405
406 static void memcg_destroy_list_lru(struct list_lru *lru)
407 {
408         struct list_lru_memcg *mlrus;
409
410         if (!list_lru_memcg_aware(lru))
411                 return;
412
413         /*
414          * This is called when shrinker has already been unregistered,
415          * and nobody can use it. So, there is no need to use kvfree_rcu().
416          */
417         mlrus = rcu_dereference_protected(lru->mlrus, true);
418         memcg_destroy_list_lru_range(mlrus, 0, memcg_nr_cache_ids);
419         kvfree(mlrus);
420 }
421
422 static int memcg_update_list_lru(struct list_lru *lru, int old_size, int new_size)
423 {
424         struct list_lru_memcg *old, *new;
425
426         BUG_ON(old_size > new_size);
427
428         old = rcu_dereference_protected(lru->mlrus,
429                                         lockdep_is_held(&list_lrus_mutex));
430         new = kvmalloc(struct_size(new, mlru, new_size), GFP_KERNEL);
431         if (!new)
432                 return -ENOMEM;
433
434         spin_lock_irq(&lru->lock);
435         memcpy(&new->mlru, &old->mlru, flex_array_size(new, mlru, old_size));
436         memset(&new->mlru[old_size], 0, flex_array_size(new, mlru, new_size - old_size));
437         rcu_assign_pointer(lru->mlrus, new);
438         spin_unlock_irq(&lru->lock);
439
440         kvfree_rcu(old, rcu);
441         return 0;
442 }
443
444 int memcg_update_all_list_lrus(int new_size)
445 {
446         int ret = 0;
447         struct list_lru *lru;
448         int old_size = memcg_nr_cache_ids;
449
450         mutex_lock(&list_lrus_mutex);
451         list_for_each_entry(lru, &memcg_list_lrus, list) {
452                 ret = memcg_update_list_lru(lru, old_size, new_size);
453                 if (ret)
454                         break;
455         }
456         mutex_unlock(&list_lrus_mutex);
457         return ret;
458 }
459
460 static void memcg_reparent_list_lru_node(struct list_lru *lru, int nid,
461                                          int src_idx, struct mem_cgroup *dst_memcg)
462 {
463         struct list_lru_node *nlru = &lru->node[nid];
464         int dst_idx = dst_memcg->kmemcg_id;
465         struct list_lru_one *src, *dst;
466
467         /*
468          * Since list_lru_{add,del} may be called under an IRQ-safe lock,
469          * we have to use IRQ-safe primitives here to avoid deadlock.
470          */
471         spin_lock_irq(&nlru->lock);
472
473         src = list_lru_from_memcg_idx(lru, nid, src_idx);
474         if (!src)
475                 goto out;
476         dst = list_lru_from_memcg_idx(lru, nid, dst_idx);
477
478         list_splice_init(&src->list, &dst->list);
479
480         if (src->nr_items) {
481                 dst->nr_items += src->nr_items;
482                 set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
483                 src->nr_items = 0;
484         }
485 out:
486         spin_unlock_irq(&nlru->lock);
487 }
488
489 static void memcg_reparent_list_lru(struct list_lru *lru,
490                                     int src_idx, struct mem_cgroup *dst_memcg)
491 {
492         int i;
493
494         for_each_node(i)
495                 memcg_reparent_list_lru_node(lru, i, src_idx, dst_memcg);
496
497         memcg_list_lru_free(lru, src_idx);
498 }
499
500 void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent)
501 {
502         struct cgroup_subsys_state *css;
503         struct list_lru *lru;
504         int src_idx = memcg->kmemcg_id;
505
506         /*
507          * Change kmemcg_id of this cgroup and all its descendants to the
508          * parent's id, and then move all entries from this cgroup's list_lrus
509          * to ones of the parent.
510          *
511          * After we have finished, all list_lrus corresponding to this cgroup
512          * are guaranteed to remain empty. So we can safely free this cgroup's
513          * list lrus in memcg_list_lru_free().
514          *
515          * Changing ->kmemcg_id to the parent can prevent memcg_list_lru_alloc()
516          * from allocating list lrus for this cgroup after memcg_list_lru_free()
517          * call.
518          */
519         rcu_read_lock();
520         css_for_each_descendant_pre(css, &memcg->css) {
521                 struct mem_cgroup *child;
522
523                 child = mem_cgroup_from_css(css);
524                 child->kmemcg_id = parent->kmemcg_id;
525         }
526         rcu_read_unlock();
527
528         mutex_lock(&list_lrus_mutex);
529         list_for_each_entry(lru, &memcg_list_lrus, list)
530                 memcg_reparent_list_lru(lru, src_idx, parent);
531         mutex_unlock(&list_lrus_mutex);
532 }
533
534 static bool memcg_list_lru_allocated(struct mem_cgroup *memcg,
535                                      struct list_lru *lru)
536 {
537         bool allocated;
538         int idx;
539
540         idx = memcg->kmemcg_id;
541         if (unlikely(idx < 0))
542                 return true;
543
544         rcu_read_lock();
545         allocated = !!rcu_access_pointer(rcu_dereference(lru->mlrus)->mlru[idx]);
546         rcu_read_unlock();
547
548         return allocated;
549 }
550
551 int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
552                          gfp_t gfp)
553 {
554         int i;
555         unsigned long flags;
556         struct list_lru_memcg *mlrus;
557         struct list_lru_memcg_table {
558                 struct list_lru_per_memcg *mlru;
559                 struct mem_cgroup *memcg;
560         } *table;
561
562         if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru))
563                 return 0;
564
565         gfp &= GFP_RECLAIM_MASK;
566         table = kmalloc_array(memcg->css.cgroup->level, sizeof(*table), gfp);
567         if (!table)
568                 return -ENOMEM;
569
570         /*
571          * Because the list_lru can be reparented to the parent cgroup's
572          * list_lru, we should make sure that this cgroup and all its
573          * ancestors have allocated list_lru_per_memcg.
574          */
575         for (i = 0; memcg; memcg = parent_mem_cgroup(memcg), i++) {
576                 if (memcg_list_lru_allocated(memcg, lru))
577                         break;
578
579                 table[i].memcg = memcg;
580                 table[i].mlru = memcg_init_list_lru_one(gfp);
581                 if (!table[i].mlru) {
582                         while (i--)
583                                 kfree(table[i].mlru);
584                         kfree(table);
585                         return -ENOMEM;
586                 }
587         }
588
589         spin_lock_irqsave(&lru->lock, flags);
590         mlrus = rcu_dereference_protected(lru->mlrus, true);
591         while (i--) {
592                 int index = table[i].memcg->kmemcg_id;
593                 struct list_lru_per_memcg *mlru = table[i].mlru;
594
595                 if (index < 0 || rcu_dereference_protected(mlrus->mlru[index], true))
596                         kfree(mlru);
597                 else
598                         rcu_assign_pointer(mlrus->mlru[index], mlru);
599         }
600         spin_unlock_irqrestore(&lru->lock, flags);
601
602         kfree(table);
603
604         return 0;
605 }
606 #else
607 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
608 {
609         return 0;
610 }
611
612 static void memcg_destroy_list_lru(struct list_lru *lru)
613 {
614 }
615 #endif /* CONFIG_MEMCG_KMEM */
616
617 int __list_lru_init(struct list_lru *lru, bool memcg_aware,
618                     struct lock_class_key *key, struct shrinker *shrinker)
619 {
620         int i;
621         int err = -ENOMEM;
622
623 #ifdef CONFIG_MEMCG_KMEM
624         if (shrinker)
625                 lru->shrinker_id = shrinker->id;
626         else
627                 lru->shrinker_id = -1;
628 #endif
629         memcg_get_cache_ids();
630
631         lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
632         if (!lru->node)
633                 goto out;
634
635         for_each_node(i) {
636                 spin_lock_init(&lru->node[i].lock);
637                 if (key)
638                         lockdep_set_class(&lru->node[i].lock, key);
639                 init_one_lru(&lru->node[i].lru);
640         }
641
642         err = memcg_init_list_lru(lru, memcg_aware);
643         if (err) {
644                 kfree(lru->node);
645                 /* Do this so a list_lru_destroy() doesn't crash: */
646                 lru->node = NULL;
647                 goto out;
648         }
649
650         list_lru_register(lru);
651 out:
652         memcg_put_cache_ids();
653         return err;
654 }
655 EXPORT_SYMBOL_GPL(__list_lru_init);
656
657 void list_lru_destroy(struct list_lru *lru)
658 {
659         /* Already destroyed or not yet initialized? */
660         if (!lru->node)
661                 return;
662
663         memcg_get_cache_ids();
664
665         list_lru_unregister(lru);
666
667         memcg_destroy_list_lru(lru);
668         kfree(lru->node);
669         lru->node = NULL;
670
671 #ifdef CONFIG_MEMCG_KMEM
672         lru->shrinker_id = -1;
673 #endif
674         memcg_put_cache_ids();
675 }
676 EXPORT_SYMBOL_GPL(list_lru_destroy);