509143f232d8030d1dd7105c858d73b42abd6f82
[linux-2.6-microblaze.git] / mm / page_counter.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Lockless hierarchical page accounting & limiting
4  *
5  * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner
6  */
7
8 #include <linux/page_counter.h>
9 #include <linux/atomic.h>
10 #include <linux/kernel.h>
11 #include <linux/string.h>
12 #include <linux/sched.h>
13 #include <linux/bug.h>
14 #include <asm/page.h>
15
16 static void propagate_protected_usage(struct page_counter *c,
17                                       unsigned long usage)
18 {
19         unsigned long protected, old_protected;
20         unsigned long low;
21         long delta;
22
23         if (!c->parent)
24                 return;
25
26         if (c->min || atomic_long_read(&c->min_usage)) {
27                 protected = min(usage, c->min);
28                 old_protected = atomic_long_xchg(&c->min_usage, protected);
29                 delta = protected - old_protected;
30                 if (delta)
31                         atomic_long_add(delta, &c->parent->children_min_usage);
32         }
33
34         low = READ_ONCE(c->low);
35         if (low || atomic_long_read(&c->low_usage)) {
36                 protected = min(usage, low);
37                 old_protected = atomic_long_xchg(&c->low_usage, protected);
38                 delta = protected - old_protected;
39                 if (delta)
40                         atomic_long_add(delta, &c->parent->children_low_usage);
41         }
42 }
43
44 /**
45  * page_counter_cancel - take pages out of the local counter
46  * @counter: counter
47  * @nr_pages: number of pages to cancel
48  */
49 void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
50 {
51         long new;
52
53         new = atomic_long_sub_return(nr_pages, &counter->usage);
54         propagate_protected_usage(counter, new);
55         /* More uncharges than charges? */
56         WARN_ON_ONCE(new < 0);
57 }
58
59 /**
60  * page_counter_charge - hierarchically charge pages
61  * @counter: counter
62  * @nr_pages: number of pages to charge
63  *
64  * NOTE: This does not consider any configured counter limits.
65  */
66 void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
67 {
68         struct page_counter *c;
69
70         for (c = counter; c; c = c->parent) {
71                 long new;
72
73                 new = atomic_long_add_return(nr_pages, &c->usage);
74                 propagate_protected_usage(counter, new);
75                 /*
76                  * This is indeed racy, but we can live with some
77                  * inaccuracy in the watermark.
78                  */
79                 if (new > c->watermark)
80                         c->watermark = new;
81         }
82 }
83
84 /**
85  * page_counter_try_charge - try to hierarchically charge pages
86  * @counter: counter
87  * @nr_pages: number of pages to charge
88  * @fail: points first counter to hit its limit, if any
89  *
90  * Returns %true on success, or %false and @fail if the counter or one
91  * of its ancestors has hit its configured limit.
92  */
93 bool page_counter_try_charge(struct page_counter *counter,
94                              unsigned long nr_pages,
95                              struct page_counter **fail)
96 {
97         struct page_counter *c;
98
99         for (c = counter; c; c = c->parent) {
100                 long new;
101                 /*
102                  * Charge speculatively to avoid an expensive CAS.  If
103                  * a bigger charge fails, it might falsely lock out a
104                  * racing smaller charge and send it into reclaim
105                  * early, but the error is limited to the difference
106                  * between the two sizes, which is less than 2M/4M in
107                  * case of a THP locking out a regular page charge.
108                  *
109                  * The atomic_long_add_return() implies a full memory
110                  * barrier between incrementing the count and reading
111                  * the limit.  When racing with page_counter_limit(),
112                  * we either see the new limit or the setter sees the
113                  * counter has changed and retries.
114                  */
115                 new = atomic_long_add_return(nr_pages, &c->usage);
116                 if (new > c->max) {
117                         atomic_long_sub(nr_pages, &c->usage);
118                         propagate_protected_usage(counter, new);
119                         /*
120                          * This is racy, but we can live with some
121                          * inaccuracy in the failcnt.
122                          */
123                         c->failcnt++;
124                         *fail = c;
125                         goto failed;
126                 }
127                 propagate_protected_usage(counter, new);
128                 /*
129                  * Just like with failcnt, we can live with some
130                  * inaccuracy in the watermark.
131                  */
132                 if (new > c->watermark)
133                         c->watermark = new;
134         }
135         return true;
136
137 failed:
138         for (c = counter; c != *fail; c = c->parent)
139                 page_counter_cancel(c, nr_pages);
140
141         return false;
142 }
143
144 /**
145  * page_counter_uncharge - hierarchically uncharge pages
146  * @counter: counter
147  * @nr_pages: number of pages to uncharge
148  */
149 void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
150 {
151         struct page_counter *c;
152
153         for (c = counter; c; c = c->parent)
154                 page_counter_cancel(c, nr_pages);
155 }
156
157 /**
158  * page_counter_set_max - set the maximum number of pages allowed
159  * @counter: counter
160  * @nr_pages: limit to set
161  *
162  * Returns 0 on success, -EBUSY if the current number of pages on the
163  * counter already exceeds the specified limit.
164  *
165  * The caller must serialize invocations on the same counter.
166  */
167 int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages)
168 {
169         for (;;) {
170                 unsigned long old;
171                 long usage;
172
173                 /*
174                  * Update the limit while making sure that it's not
175                  * below the concurrently-changing counter value.
176                  *
177                  * The xchg implies two full memory barriers before
178                  * and after, so the read-swap-read is ordered and
179                  * ensures coherency with page_counter_try_charge():
180                  * that function modifies the count before checking
181                  * the limit, so if it sees the old limit, we see the
182                  * modified counter and retry.
183                  */
184                 usage = atomic_long_read(&counter->usage);
185
186                 if (usage > nr_pages)
187                         return -EBUSY;
188
189                 old = xchg(&counter->max, nr_pages);
190
191                 if (atomic_long_read(&counter->usage) <= usage)
192                         return 0;
193
194                 counter->max = old;
195                 cond_resched();
196         }
197 }
198
199 /**
200  * page_counter_set_min - set the amount of protected memory
201  * @counter: counter
202  * @nr_pages: value to set
203  *
204  * The caller must serialize invocations on the same counter.
205  */
206 void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages)
207 {
208         struct page_counter *c;
209
210         counter->min = nr_pages;
211
212         for (c = counter; c; c = c->parent)
213                 propagate_protected_usage(c, atomic_long_read(&c->usage));
214 }
215
216 /**
217  * page_counter_set_low - set the amount of protected memory
218  * @counter: counter
219  * @nr_pages: value to set
220  *
221  * The caller must serialize invocations on the same counter.
222  */
223 void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages)
224 {
225         struct page_counter *c;
226
227         WRITE_ONCE(counter->low, nr_pages);
228
229         for (c = counter; c; c = c->parent)
230                 propagate_protected_usage(c, atomic_long_read(&c->usage));
231 }
232
233 /**
234  * page_counter_memparse - memparse() for page counter limits
235  * @buf: string to parse
236  * @max: string meaning maximum possible value
237  * @nr_pages: returns the result in number of pages
238  *
239  * Returns -EINVAL, or 0 and @nr_pages on success.  @nr_pages will be
240  * limited to %PAGE_COUNTER_MAX.
241  */
242 int page_counter_memparse(const char *buf, const char *max,
243                           unsigned long *nr_pages)
244 {
245         char *end;
246         u64 bytes;
247
248         if (!strcmp(buf, max)) {
249                 *nr_pages = PAGE_COUNTER_MAX;
250                 return 0;
251         }
252
253         bytes = memparse(buf, &end);
254         if (*end != '\0')
255                 return -EINVAL;
256
257         *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);
258
259         return 0;
260 }