{
int id, ret = -ENOMEM;
+ if (mem_cgroup_disabled())
+ return -ENOSYS;
+
down_write(&shrinker_rwsem);
/* This may call shrinker, so it must use down_read_trylock() */
id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
idr_remove(&shrinker_idr, id);
}
+static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
+ struct mem_cgroup *memcg)
+{
+ struct shrinker_info *info;
+
+ info = shrinker_info_protected(memcg, nid);
+ return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0);
+}
+
+static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
+ struct mem_cgroup *memcg)
+{
+ struct shrinker_info *info;
+
+ info = shrinker_info_protected(memcg, nid);
+ return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]);
+}
+
+void reparent_shrinker_deferred(struct mem_cgroup *memcg)
+{
+ int i, nid;
+ long nr;
+ struct mem_cgroup *parent;
+ struct shrinker_info *child_info, *parent_info;
+
+ parent = parent_mem_cgroup(memcg);
+ if (!parent)
+ parent = root_mem_cgroup;
+
+ /* Prevent from concurrent shrinker_info expand */
+ down_read(&shrinker_rwsem);
+ for_each_node(nid) {
+ child_info = shrinker_info_protected(memcg, nid);
+ parent_info = shrinker_info_protected(parent, nid);
+ for (i = 0; i < shrinker_nr_max; i++) {
+ nr = atomic_long_read(&child_info->nr_deferred[i]);
+ atomic_long_add(nr, &parent_info->nr_deferred[i]);
+ }
+ }
+ up_read(&shrinker_rwsem);
+}
+
static bool cgroup_reclaim(struct scan_control *sc)
{
return sc->target_mem_cgroup;
#else
static int prealloc_memcg_shrinker(struct shrinker *shrinker)
{
- return 0;
+ return -ENOSYS;
}
static void unregister_memcg_shrinker(struct shrinker *shrinker)
{
}
+static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
+ struct mem_cgroup *memcg)
+{
+ return 0;
+}
+
+static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
+ struct mem_cgroup *memcg)
+{
+ return 0;
+}
+
static bool cgroup_reclaim(struct scan_control *sc)
{
return false;
}
#endif
+static long xchg_nr_deferred(struct shrinker *shrinker,
+ struct shrink_control *sc)
+{
+ int nid = sc->nid;
+
+ if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
+ nid = 0;
+
+ if (sc->memcg &&
+ (shrinker->flags & SHRINKER_MEMCG_AWARE))
+ return xchg_nr_deferred_memcg(nid, shrinker,
+ sc->memcg);
+
+ return atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
+}
+
+
+static long add_nr_deferred(long nr, struct shrinker *shrinker,
+ struct shrink_control *sc)
+{
+ int nid = sc->nid;
+
+ if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
+ nid = 0;
+
+ if (sc->memcg &&
+ (shrinker->flags & SHRINKER_MEMCG_AWARE))
+ return add_nr_deferred_memcg(nr, nid, shrinker,
+ sc->memcg);
+
+ return atomic_long_add_return(nr, &shrinker->nr_deferred[nid]);
+}
+
/*
* This misses isolated pages which are not accounted for to save counters.
* As the data only determines if reclaim or compaction continues, it is
*/
int prealloc_shrinker(struct shrinker *shrinker)
{
- unsigned int size = sizeof(*shrinker->nr_deferred);
+ unsigned int size;
+ int err;
+
+ if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
+ err = prealloc_memcg_shrinker(shrinker);
+ if (err != -ENOSYS)
+ return err;
+
+ shrinker->flags &= ~SHRINKER_MEMCG_AWARE;
+ }
+ size = sizeof(*shrinker->nr_deferred);
if (shrinker->flags & SHRINKER_NUMA_AWARE)
size *= nr_node_ids;
if (!shrinker->nr_deferred)
return -ENOMEM;
- if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
- if (prealloc_memcg_shrinker(shrinker))
- goto free_deferred;
- }
-
return 0;
-
-free_deferred:
- kfree(shrinker->nr_deferred);
- shrinker->nr_deferred = NULL;
- return -ENOMEM;
}
void free_prealloced_shrinker(struct shrinker *shrinker)
{
- if (!shrinker->nr_deferred)
- return;
-
if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
down_write(&shrinker_rwsem);
unregister_memcg_shrinker(shrinker);
up_write(&shrinker_rwsem);
+ return;
}
kfree(shrinker->nr_deferred);
long freeable;
long nr;
long new_nr;
- int nid = shrinkctl->nid;
long batch_size = shrinker->batch ? shrinker->batch
: SHRINK_BATCH;
long scanned = 0, next_deferred;
- if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
- nid = 0;
-
freeable = shrinker->count_objects(shrinker, shrinkctl);
if (freeable == 0 || freeable == SHRINK_EMPTY)
return freeable;
* and zero it so that other concurrent shrinker invocations
* don't also do this scanning work.
*/
- nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
+ nr = xchg_nr_deferred(shrinker, shrinkctl);
- total_scan = nr;
if (shrinker->seeks) {
delta = freeable >> priority;
delta *= 4;
delta = freeable / 2;
}
+ total_scan = nr >> priority;
total_scan += delta;
- if (total_scan < 0) {
- pr_err("shrink_slab: %pS negative objects to delete nr=%ld\n",
- shrinker->scan_objects, total_scan);
- total_scan = freeable;
- next_deferred = nr;
- } else
- next_deferred = total_scan;
-
- /*
- * We need to avoid excessive windup on filesystem shrinkers
- * due to large numbers of GFP_NOFS allocations causing the
- * shrinkers to return -1 all the time. This results in a large
- * nr being built up so when a shrink that can do some work
- * comes along it empties the entire cache due to nr >>>
- * freeable. This is bad for sustaining a working set in
- * memory.
- *
- * Hence only allow the shrinker to scan the entire cache when
- * a large delta change is calculated directly.
- */
- if (delta < freeable / 4)
- total_scan = min(total_scan, freeable / 2);
-
- /*
- * Avoid risking looping forever due to too large nr value:
- * never try to free more than twice the estimate number of
- * freeable entries.
- */
- if (total_scan > freeable * 2)
- total_scan = freeable * 2;
+ total_scan = min(total_scan, (2 * freeable));
trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
freeable, delta, total_scan, priority);
cond_resched();
}
- if (next_deferred >= scanned)
- next_deferred -= scanned;
- else
- next_deferred = 0;
+ /*
+ * The deferred work is increased by any new work (delta) that wasn't
+ * done, decreased by old deferred work that was done now.
+ *
+ * And it is capped to two times of the freeable items.
+ */
+ next_deferred = max_t(long, (nr + delta - scanned), 0);
+ next_deferred = min(next_deferred, (2 * freeable));
+
/*
* move the unused scan count back into the shrinker in a
- * manner that handles concurrent updates. If we exhausted the
- * scan, there is no need to do an update.
+ * manner that handles concurrent updates.
*/
- if (next_deferred > 0)
- new_nr = atomic_long_add_return(next_deferred,
- &shrinker->nr_deferred[nid]);
- else
- new_nr = atomic_long_read(&shrinker->nr_deferred[nid]);
+ new_nr = add_nr_deferred(next_deferred, shrinker, shrinkctl);
trace_mm_shrink_slab_end(shrinker, shrinkctl->nid, freed, nr, new_nr, total_scan);
return freed;
{
unsigned int alloc_order, reclaim_order;
unsigned int highest_zoneidx = MAX_NR_ZONES - 1;
- pg_data_t *pgdat = (pg_data_t*)p;
+ pg_data_t *pgdat = (pg_data_t *)p;
struct task_struct *tsk = current;
const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);