slub: make dead caches discard free slabs immediately
[linux-2.6-microblaze.git] / mm / slab_common.c
index e03dd6f..1a1cc89 100644 (file)
@@ -106,62 +106,67 @@ static inline int kmem_cache_sanity_check(const char *name, size_t size)
 #endif
 
 #ifdef CONFIG_MEMCG_KMEM
-static int memcg_alloc_cache_params(struct mem_cgroup *memcg,
-               struct kmem_cache *s, struct kmem_cache *root_cache)
+void slab_init_memcg_params(struct kmem_cache *s)
 {
-       size_t size;
+       s->memcg_params.is_root_cache = true;
+       INIT_LIST_HEAD(&s->memcg_params.list);
+       RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
+}
+
+static int init_memcg_params(struct kmem_cache *s,
+               struct mem_cgroup *memcg, struct kmem_cache *root_cache)
+{
+       struct memcg_cache_array *arr;
 
-       if (!memcg_kmem_enabled())
+       if (memcg) {
+               s->memcg_params.is_root_cache = false;
+               s->memcg_params.memcg = memcg;
+               s->memcg_params.root_cache = root_cache;
                return 0;
+       }
 
-       if (!memcg) {
-               size = offsetof(struct memcg_cache_params, memcg_caches);
-               size += memcg_limited_groups_array_size * sizeof(void *);
-       } else
-               size = sizeof(struct memcg_cache_params);
+       slab_init_memcg_params(s);
 
-       s->memcg_params = kzalloc(size, GFP_KERNEL);
-       if (!s->memcg_params)
-               return -ENOMEM;
+       if (!memcg_nr_cache_ids)
+               return 0;
 
-       if (memcg) {
-               s->memcg_params->memcg = memcg;
-               s->memcg_params->root_cache = root_cache;
-       } else
-               s->memcg_params->is_root_cache = true;
+       arr = kzalloc(sizeof(struct memcg_cache_array) +
+                     memcg_nr_cache_ids * sizeof(void *),
+                     GFP_KERNEL);
+       if (!arr)
+               return -ENOMEM;
 
+       RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr);
        return 0;
 }
 
-static void memcg_free_cache_params(struct kmem_cache *s)
+static void destroy_memcg_params(struct kmem_cache *s)
 {
-       kfree(s->memcg_params);
+       if (is_root_cache(s))
+               kfree(rcu_access_pointer(s->memcg_params.memcg_caches));
 }
 
-static int memcg_update_cache_params(struct kmem_cache *s, int num_memcgs)
+static int update_memcg_params(struct kmem_cache *s, int new_array_size)
 {
-       int size;
-       struct memcg_cache_params *new_params, *cur_params;
-
-       BUG_ON(!is_root_cache(s));
+       struct memcg_cache_array *old, *new;
 
-       size = offsetof(struct memcg_cache_params, memcg_caches);
-       size += num_memcgs * sizeof(void *);
+       if (!is_root_cache(s))
+               return 0;
 
-       new_params = kzalloc(size, GFP_KERNEL);
-       if (!new_params)
+       new = kzalloc(sizeof(struct memcg_cache_array) +
+                     new_array_size * sizeof(void *), GFP_KERNEL);
+       if (!new)
                return -ENOMEM;
 
-       cur_params = s->memcg_params;
-       memcpy(new_params->memcg_caches, cur_params->memcg_caches,
-              memcg_limited_groups_array_size * sizeof(void *));
-
-       new_params->is_root_cache = true;
-
-       rcu_assign_pointer(s->memcg_params, new_params);
-       if (cur_params)
-               kfree_rcu(cur_params, rcu_head);
+       old = rcu_dereference_protected(s->memcg_params.memcg_caches,
+                                       lockdep_is_held(&slab_mutex));
+       if (old)
+               memcpy(new->entries, old->entries,
+                      memcg_nr_cache_ids * sizeof(void *));
 
+       rcu_assign_pointer(s->memcg_params.memcg_caches, new);
+       if (old)
+               kfree_rcu(old, rcu);
        return 0;
 }
 
@@ -169,34 +174,28 @@ int memcg_update_all_caches(int num_memcgs)
 {
        struct kmem_cache *s;
        int ret = 0;
-       mutex_lock(&slab_mutex);
 
+       mutex_lock(&slab_mutex);
        list_for_each_entry(s, &slab_caches, list) {
-               if (!is_root_cache(s))
-                       continue;
-
-               ret = memcg_update_cache_params(s, num_memcgs);
+               ret = update_memcg_params(s, num_memcgs);
                /*
                 * Instead of freeing the memory, we'll just leave the caches
                 * up to this point in an updated state.
                 */
                if (ret)
-                       goto out;
+                       break;
        }
-
-       memcg_update_array_size(num_memcgs);
-out:
        mutex_unlock(&slab_mutex);
        return ret;
 }
 #else
-static inline int memcg_alloc_cache_params(struct mem_cgroup *memcg,
-               struct kmem_cache *s, struct kmem_cache *root_cache)
+static inline int init_memcg_params(struct kmem_cache *s,
+               struct mem_cgroup *memcg, struct kmem_cache *root_cache)
 {
        return 0;
 }
 
-static inline void memcg_free_cache_params(struct kmem_cache *s)
+static inline void destroy_memcg_params(struct kmem_cache *s)
 {
 }
 #endif /* CONFIG_MEMCG_KMEM */
@@ -314,7 +313,7 @@ do_kmem_cache_create(char *name, size_t object_size, size_t size, size_t align,
        s->align = align;
        s->ctor = ctor;
 
-       err = memcg_alloc_cache_params(memcg, s, root_cache);
+       err = init_memcg_params(s, memcg, root_cache);
        if (err)
                goto out_free_cache;
 
@@ -330,8 +329,8 @@ out:
        return s;
 
 out_free_cache:
-       memcg_free_cache_params(s);
-       kfree(s);
+       destroy_memcg_params(s);
+       kmem_cache_free(kmem_cache, s);
        goto out;
 }
 
@@ -369,6 +368,7 @@ kmem_cache_create(const char *name, size_t size, size_t align,
 
        get_online_cpus();
        get_online_mems();
+       memcg_get_cache_ids();
 
        mutex_lock(&slab_mutex);
 
@@ -407,6 +407,7 @@ kmem_cache_create(const char *name, size_t size, size_t align,
 out_unlock:
        mutex_unlock(&slab_mutex);
 
+       memcg_put_cache_ids();
        put_online_mems();
        put_online_cpus();
 
@@ -425,31 +426,91 @@ out_unlock:
 }
 EXPORT_SYMBOL(kmem_cache_create);
 
+static int do_kmem_cache_shutdown(struct kmem_cache *s,
+               struct list_head *release, bool *need_rcu_barrier)
+{
+       if (__kmem_cache_shutdown(s) != 0) {
+               printk(KERN_ERR "kmem_cache_destroy %s: "
+                      "Slab cache still has objects\n", s->name);
+               dump_stack();
+               return -EBUSY;
+       }
+
+       if (s->flags & SLAB_DESTROY_BY_RCU)
+               *need_rcu_barrier = true;
+
+#ifdef CONFIG_MEMCG_KMEM
+       if (!is_root_cache(s))
+               list_del(&s->memcg_params.list);
+#endif
+       list_move(&s->list, release);
+       return 0;
+}
+
+static void do_kmem_cache_release(struct list_head *release,
+                                 bool need_rcu_barrier)
+{
+       struct kmem_cache *s, *s2;
+
+       if (need_rcu_barrier)
+               rcu_barrier();
+
+       list_for_each_entry_safe(s, s2, release, list) {
+#ifdef SLAB_SUPPORTS_SYSFS
+               sysfs_slab_remove(s);
+#else
+               slab_kmem_cache_release(s);
+#endif
+       }
+}
+
 #ifdef CONFIG_MEMCG_KMEM
 /*
  * memcg_create_kmem_cache - Create a cache for a memory cgroup.
  * @memcg: The memory cgroup the new cache is for.
  * @root_cache: The parent of the new cache.
- * @memcg_name: The name of the memory cgroup (used for naming the new cache).
  *
  * This function attempts to create a kmem cache that will serve allocation
  * requests going from @memcg to @root_cache. The new cache inherits properties
  * from its parent.
  */
-struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
-                                          struct kmem_cache *root_cache,
-                                          const char *memcg_name)
+void memcg_create_kmem_cache(struct mem_cgroup *memcg,
+                            struct kmem_cache *root_cache)
 {
+       static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
+       struct cgroup_subsys_state *css = mem_cgroup_css(memcg);
+       struct memcg_cache_array *arr;
        struct kmem_cache *s = NULL;
        char *cache_name;
+       int idx;
 
        get_online_cpus();
        get_online_mems();
 
        mutex_lock(&slab_mutex);
 
+       /*
+        * The memory cgroup could have been deactivated while the cache
+        * creation work was pending.
+        */
+       if (!memcg_kmem_is_active(memcg))
+               goto out_unlock;
+
+       idx = memcg_cache_id(memcg);
+       arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches,
+                                       lockdep_is_held(&slab_mutex));
+
+       /*
+        * Since per-memcg caches are created asynchronously on first
+        * allocation (see memcg_kmem_get_cache()), several threads can try to
+        * create the same cache, but only one of them may succeed.
+        */
+       if (arr->entries[idx])
+               goto out_unlock;
+
+       cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
        cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
-                              memcg_cache_id(memcg), memcg_name);
+                              css->id, memcg_name_buf);
        if (!cache_name)
                goto out_unlock;
 
@@ -457,49 +518,108 @@ struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
                                 root_cache->size, root_cache->align,
                                 root_cache->flags, root_cache->ctor,
                                 memcg, root_cache);
+       /*
+        * If we could not create a memcg cache, do not complain, because
+        * that's not critical at all as we can always proceed with the root
+        * cache.
+        */
        if (IS_ERR(s)) {
                kfree(cache_name);
-               s = NULL;
+               goto out_unlock;
        }
 
+       list_add(&s->memcg_params.list, &root_cache->memcg_params.list);
+
+       /*
+        * Since readers won't lock (see cache_from_memcg_idx()), we need a
+        * barrier here to ensure nobody will see the kmem_cache partially
+        * initialized.
+        */
+       smp_wmb();
+       arr->entries[idx] = s;
+
 out_unlock:
        mutex_unlock(&slab_mutex);
 
        put_online_mems();
        put_online_cpus();
-
-       return s;
 }
 
-static int memcg_cleanup_cache_params(struct kmem_cache *s)
+void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
 {
-       int rc;
+       int idx;
+       struct memcg_cache_array *arr;
+       struct kmem_cache *s, *c;
 
-       if (!s->memcg_params ||
-           !s->memcg_params->is_root_cache)
-               return 0;
+       idx = memcg_cache_id(memcg);
+
+       get_online_cpus();
+       get_online_mems();
 
-       mutex_unlock(&slab_mutex);
-       rc = __memcg_cleanup_cache_params(s);
        mutex_lock(&slab_mutex);
+       list_for_each_entry(s, &slab_caches, list) {
+               if (!is_root_cache(s))
+                       continue;
+
+               arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
+                                               lockdep_is_held(&slab_mutex));
+               c = arr->entries[idx];
+               if (!c)
+                       continue;
 
-       return rc;
+               __kmem_cache_shrink(c, true);
+               arr->entries[idx] = NULL;
+       }
+       mutex_unlock(&slab_mutex);
+
+       put_online_mems();
+       put_online_cpus();
 }
-#else
-static int memcg_cleanup_cache_params(struct kmem_cache *s)
+
+void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
 {
-       return 0;
+       LIST_HEAD(release);
+       bool need_rcu_barrier = false;
+       struct kmem_cache *s, *s2;
+
+       get_online_cpus();
+       get_online_mems();
+
+       mutex_lock(&slab_mutex);
+       list_for_each_entry_safe(s, s2, &slab_caches, list) {
+               if (is_root_cache(s) || s->memcg_params.memcg != memcg)
+                       continue;
+               /*
+                * The cgroup is about to be freed and therefore has no charges
+                * left. Hence, all its caches must be empty by now.
+                */
+               BUG_ON(do_kmem_cache_shutdown(s, &release, &need_rcu_barrier));
+       }
+       mutex_unlock(&slab_mutex);
+
+       put_online_mems();
+       put_online_cpus();
+
+       do_kmem_cache_release(&release, need_rcu_barrier);
 }
 #endif /* CONFIG_MEMCG_KMEM */
 
 void slab_kmem_cache_release(struct kmem_cache *s)
 {
+       destroy_memcg_params(s);
        kfree(s->name);
        kmem_cache_free(kmem_cache, s);
 }
 
 void kmem_cache_destroy(struct kmem_cache *s)
 {
+       struct kmem_cache *c, *c2;
+       LIST_HEAD(release);
+       bool need_rcu_barrier = false;
+       bool busy = false;
+
+       BUG_ON(!is_root_cache(s));
+
        get_online_cpus();
        get_online_mems();
 
@@ -509,35 +629,21 @@ void kmem_cache_destroy(struct kmem_cache *s)
        if (s->refcount)
                goto out_unlock;
 
-       if (memcg_cleanup_cache_params(s) != 0)
-               goto out_unlock;
-
-       if (__kmem_cache_shutdown(s) != 0) {
-               printk(KERN_ERR "kmem_cache_destroy %s: "
-                      "Slab cache still has objects\n", s->name);
-               dump_stack();
-               goto out_unlock;
+       for_each_memcg_cache_safe(c, c2, s) {
+               if (do_kmem_cache_shutdown(c, &release, &need_rcu_barrier))
+                       busy = true;
        }
 
-       list_del(&s->list);
-
-       mutex_unlock(&slab_mutex);
-       if (s->flags & SLAB_DESTROY_BY_RCU)
-               rcu_barrier();
-
-       memcg_free_cache_params(s);
-#ifdef SLAB_SUPPORTS_SYSFS
-       sysfs_slab_remove(s);
-#else
-       slab_kmem_cache_release(s);
-#endif
-       goto out;
+       if (!busy)
+               do_kmem_cache_shutdown(s, &release, &need_rcu_barrier);
 
 out_unlock:
        mutex_unlock(&slab_mutex);
-out:
+
        put_online_mems();
        put_online_cpus();
+
+       do_kmem_cache_release(&release, need_rcu_barrier);
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
 
@@ -554,7 +660,7 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
 
        get_online_cpus();
        get_online_mems();
-       ret = __kmem_cache_shrink(cachep);
+       ret = __kmem_cache_shrink(cachep, false);
        put_online_mems();
        put_online_cpus();
        return ret;
@@ -576,6 +682,9 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t siz
        s->name = name;
        s->size = s->object_size = size;
        s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
+
+       slab_init_memcg_params(s);
+
        err = __kmem_cache_create(s, flags);
 
        if (err)
@@ -855,16 +964,11 @@ memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
 {
        struct kmem_cache *c;
        struct slabinfo sinfo;
-       int i;
 
        if (!is_root_cache(s))
                return;
 
-       for_each_memcg_cache_index(i) {
-               c = cache_from_memcg_idx(s, i);
-               if (!c)
-                       continue;
-
+       for_each_memcg_cache(c, s) {
                memset(&sinfo, 0, sizeof(sinfo));
                get_slabinfo(c, &sinfo);
 
@@ -916,7 +1020,7 @@ int memcg_slab_show(struct seq_file *m, void *p)
 
        if (p == slab_caches.next)
                print_slabinfo_header(m);
-       if (!is_root_cache(s) && s->memcg_params->memcg == memcg)
+       if (!is_root_cache(s) && s->memcg_params.memcg == memcg)
                cache_show(s, m);
        return 0;
 }