mm: memcontrol: fix memory.low proportional distribution
[linux-2.6-microblaze.git] / mm / page_counter.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Lockless hierarchical page accounting & limiting
4  *
5  * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner
6  */
7
8 #include <linux/page_counter.h>
9 #include <linux/atomic.h>
10 #include <linux/kernel.h>
11 #include <linux/string.h>
12 #include <linux/sched.h>
13 #include <linux/bug.h>
14 #include <asm/page.h>
15
16 static void propagate_protected_usage(struct page_counter *c,
17                                       unsigned long usage)
18 {
19         unsigned long protected, old_protected;
20         long delta;
21
22         if (!c->parent)
23                 return;
24
25         if (c->min || atomic_long_read(&c->min_usage)) {
26                 protected = min(usage, c->min);
27                 old_protected = atomic_long_xchg(&c->min_usage, protected);
28                 delta = protected - old_protected;
29                 if (delta)
30                         atomic_long_add(delta, &c->parent->children_min_usage);
31         }
32
33         if (c->low || atomic_long_read(&c->low_usage)) {
34                 protected = min(usage, c->low);
35                 old_protected = atomic_long_xchg(&c->low_usage, protected);
36                 delta = protected - old_protected;
37                 if (delta)
38                         atomic_long_add(delta, &c->parent->children_low_usage);
39         }
40 }
41
42 /**
43  * page_counter_cancel - take pages out of the local counter
44  * @counter: counter
45  * @nr_pages: number of pages to cancel
46  */
47 void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
48 {
49         long new;
50
51         new = atomic_long_sub_return(nr_pages, &counter->usage);
52         propagate_protected_usage(counter, new);
53         /* More uncharges than charges? */
54         WARN_ON_ONCE(new < 0);
55 }
56
57 /**
58  * page_counter_charge - hierarchically charge pages
59  * @counter: counter
60  * @nr_pages: number of pages to charge
61  *
62  * NOTE: This does not consider any configured counter limits.
63  */
64 void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
65 {
66         struct page_counter *c;
67
68         for (c = counter; c; c = c->parent) {
69                 long new;
70
71                 new = atomic_long_add_return(nr_pages, &c->usage);
72                 propagate_protected_usage(counter, new);
73                 /*
74                  * This is indeed racy, but we can live with some
75                  * inaccuracy in the watermark.
76                  */
77                 if (new > c->watermark)
78                         c->watermark = new;
79         }
80 }
81
82 /**
83  * page_counter_try_charge - try to hierarchically charge pages
84  * @counter: counter
85  * @nr_pages: number of pages to charge
86  * @fail: points first counter to hit its limit, if any
87  *
88  * Returns %true on success, or %false and @fail if the counter or one
89  * of its ancestors has hit its configured limit.
90  */
91 bool page_counter_try_charge(struct page_counter *counter,
92                              unsigned long nr_pages,
93                              struct page_counter **fail)
94 {
95         struct page_counter *c;
96
97         for (c = counter; c; c = c->parent) {
98                 long new;
99                 /*
100                  * Charge speculatively to avoid an expensive CAS.  If
101                  * a bigger charge fails, it might falsely lock out a
102                  * racing smaller charge and send it into reclaim
103                  * early, but the error is limited to the difference
104                  * between the two sizes, which is less than 2M/4M in
105                  * case of a THP locking out a regular page charge.
106                  *
107                  * The atomic_long_add_return() implies a full memory
108                  * barrier between incrementing the count and reading
109                  * the limit.  When racing with page_counter_limit(),
110                  * we either see the new limit or the setter sees the
111                  * counter has changed and retries.
112                  */
113                 new = atomic_long_add_return(nr_pages, &c->usage);
114                 if (new > c->max) {
115                         atomic_long_sub(nr_pages, &c->usage);
116                         propagate_protected_usage(counter, new);
117                         /*
118                          * This is racy, but we can live with some
119                          * inaccuracy in the failcnt.
120                          */
121                         c->failcnt++;
122                         *fail = c;
123                         goto failed;
124                 }
125                 propagate_protected_usage(counter, new);
126                 /*
127                  * Just like with failcnt, we can live with some
128                  * inaccuracy in the watermark.
129                  */
130                 if (new > c->watermark)
131                         c->watermark = new;
132         }
133         return true;
134
135 failed:
136         for (c = counter; c != *fail; c = c->parent)
137                 page_counter_cancel(c, nr_pages);
138
139         return false;
140 }
141
142 /**
143  * page_counter_uncharge - hierarchically uncharge pages
144  * @counter: counter
145  * @nr_pages: number of pages to uncharge
146  */
147 void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
148 {
149         struct page_counter *c;
150
151         for (c = counter; c; c = c->parent)
152                 page_counter_cancel(c, nr_pages);
153 }
154
155 /**
156  * page_counter_set_max - set the maximum number of pages allowed
157  * @counter: counter
158  * @nr_pages: limit to set
159  *
160  * Returns 0 on success, -EBUSY if the current number of pages on the
161  * counter already exceeds the specified limit.
162  *
163  * The caller must serialize invocations on the same counter.
164  */
165 int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages)
166 {
167         for (;;) {
168                 unsigned long old;
169                 long usage;
170
171                 /*
172                  * Update the limit while making sure that it's not
173                  * below the concurrently-changing counter value.
174                  *
175                  * The xchg implies two full memory barriers before
176                  * and after, so the read-swap-read is ordered and
177                  * ensures coherency with page_counter_try_charge():
178                  * that function modifies the count before checking
179                  * the limit, so if it sees the old limit, we see the
180                  * modified counter and retry.
181                  */
182                 usage = atomic_long_read(&counter->usage);
183
184                 if (usage > nr_pages)
185                         return -EBUSY;
186
187                 old = xchg(&counter->max, nr_pages);
188
189                 if (atomic_long_read(&counter->usage) <= usage)
190                         return 0;
191
192                 counter->max = old;
193                 cond_resched();
194         }
195 }
196
197 /**
198  * page_counter_set_min - set the amount of protected memory
199  * @counter: counter
200  * @nr_pages: value to set
201  *
202  * The caller must serialize invocations on the same counter.
203  */
204 void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages)
205 {
206         struct page_counter *c;
207
208         counter->min = nr_pages;
209
210         for (c = counter; c; c = c->parent)
211                 propagate_protected_usage(c, atomic_long_read(&c->usage));
212 }
213
214 /**
215  * page_counter_set_low - set the amount of protected memory
216  * @counter: counter
217  * @nr_pages: value to set
218  *
219  * The caller must serialize invocations on the same counter.
220  */
221 void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages)
222 {
223         struct page_counter *c;
224
225         counter->low = nr_pages;
226
227         for (c = counter; c; c = c->parent)
228                 propagate_protected_usage(c, atomic_long_read(&c->usage));
229 }
230
231 /**
232  * page_counter_memparse - memparse() for page counter limits
233  * @buf: string to parse
234  * @max: string meaning maximum possible value
235  * @nr_pages: returns the result in number of pages
236  *
237  * Returns -EINVAL, or 0 and @nr_pages on success.  @nr_pages will be
238  * limited to %PAGE_COUNTER_MAX.
239  */
240 int page_counter_memparse(const char *buf, const char *max,
241                           unsigned long *nr_pages)
242 {
243         char *end;
244         u64 bytes;
245
246         if (!strcmp(buf, max)) {
247                 *nr_pages = PAGE_COUNTER_MAX;
248                 return 0;
249         }
250
251         bytes = memparse(buf, &end);
252         if (*end != '\0')
253                 return -EINVAL;
254
255         *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);
256
257         return 0;
258 }