#endif
#endif
-/*
- * Returns true if any of the specified slub_debug flags is enabled for the
- * cache. Use only for flags parsed by setup_slub_debug() as it also enables
- * the static key.
- */
-static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t flags)
-{
- VM_WARN_ON_ONCE(!(flags & SLAB_DEBUG_FLAGS));
-#ifdef CONFIG_SLUB_DEBUG
- if (static_branch_unlikely(&slub_debug_enabled))
- return s->flags & flags;
-#endif
- return false;
-}
-
static inline bool kmem_cache_debug(struct kmem_cache *s)
{
return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
#ifdef CONFIG_SYSFS
static int sysfs_slab_add(struct kmem_cache *);
static int sysfs_slab_alias(struct kmem_cache *, const char *);
-static void memcg_propagate_slab_attrs(struct kmem_cache *s);
-static void sysfs_slab_remove(struct kmem_cache *s);
#else
static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
{ return 0; }
-static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
-static inline void sysfs_slab_remove(struct kmem_cache *s) { }
#endif
static inline void stat(const struct kmem_cache *s, enum stat_item si)
__p < (__addr) + (__objects) * (__s)->size; \
__p += (__s)->size)
-/* Determine object index from a given position */
-static inline unsigned int slab_index(void *p, struct kmem_cache *s, void *addr)
-{
- return (kasan_reset_tag(p) - addr) / s->size;
-}
-
static inline unsigned int order_objects(unsigned int order, unsigned int size)
{
return ((unsigned int)PAGE_SIZE << order) / size;
bitmap_zero(object_map, page->objects);
for (p = page->freelist; p; p = get_freepointer(s, p))
- set_bit(slab_index(p, s, addr), object_map);
+ set_bit(__obj_to_index(s, addr, p), object_map);
return object_map;
}
static void put_map(unsigned long *map) __releases(&object_map_lock)
{
VM_BUG_ON(map != object_map);
- lockdep_assert_held(&object_map_lock);
-
spin_unlock(&object_map_lock);
}
#endif
}
-static void print_tracking(struct kmem_cache *s, void *object)
+void print_tracking(struct kmem_cache *s, void *object)
{
unsigned long pr_time = jiffies;
if (!(s->flags & SLAB_STORE_USER))
static void setup_object_debug(struct kmem_cache *s, struct page *page,
void *object)
{
- if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
+ if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))
return;
init_object(s, object, SLUB_RED_INACTIVE);
static
void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr)
{
- if (!(s->flags & SLAB_POISON))
+ if (!kmem_cache_debug_flags(s, SLAB_POISON))
return;
metadata_access_enable();
if (!(s->flags & SLAB_DEBUG_OBJECTS))
debug_check_no_obj_freed(x, s->object_size);
+ /* Use KCSAN to help debug racy use-after-free. */
+ if (!(s->flags & SLAB_TYPESAFE_BY_RCU))
+ __kcsan_check_access(x, s->object_size,
+ KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
+
/* KASAN might put x into memory quarantine, delaying its reuse */
return kasan_slab_free(s, x, _RET_IP_);
}
else
page = __alloc_pages_node(node, flags, order);
- if (page && charge_slab_page(page, flags, order, s)) {
- __free_pages(page, order);
- page = NULL;
- }
+ if (page)
+ account_slab_page(page, order, s);
return page;
}
int order = compound_order(page);
int pages = 1 << order;
- if (s->flags & SLAB_CONSISTENCY_CHECKS) {
+ if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
void *p;
slab_pad_check(s, page);
page->mapping = NULL;
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += pages;
- uncharge_slab_page(page, order, s);
+ unaccount_slab_page(page, order, s);
__free_pages(page, order);
}
struct kmem_cache_cpu *c;
struct page *page;
unsigned long tid;
+ struct obj_cgroup *objcg = NULL;
- s = slab_pre_alloc_hook(s, gfpflags);
+ s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags);
if (!s)
return NULL;
redo:
if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
memset(object, 0, s->object_size);
- slab_post_alloc_hook(s, gfpflags, 1, &object);
+ slab_post_alloc_hook(s, objcg, gfpflags, 1, &object);
return object;
}
void *tail_obj = tail ? : head;
struct kmem_cache_cpu *c;
unsigned long tid;
+
+ memcg_slab_free_hook(s, page, head);
redo:
/*
* Determine the currently cpus per cpu slab.
{
struct kmem_cache_cpu *c;
int i;
+ struct obj_cgroup *objcg = NULL;
/* memcg and kmem_cache debug support */
- s = slab_pre_alloc_hook(s, flags);
+ s = slab_pre_alloc_hook(s, &objcg, size, flags);
if (unlikely(!s))
return false;
/*
}
/* memcg and kmem_cache debug support */
- slab_post_alloc_hook(s, flags, size, p);
+ slab_post_alloc_hook(s, objcg, flags, size, p);
return i;
error:
local_irq_enable();
- slab_post_alloc_hook(s, flags, i, p);
+ slab_post_alloc_hook(s, objcg, flags, i, p);
__kmem_cache_free_bulk(s, i, p);
return 0;
}
*/
size = ALIGN(size, s->align);
s->size = size;
+ s->reciprocal_size = reciprocal_value(size);
if (forced_order >= 0)
order = forced_order;
else
map = get_map(s, page);
for_each_object(p, s, addr, page->objects) {
- if (!test_bit(slab_index(p, s, addr), map)) {
+ if (!test_bit(__obj_to_index(s, addr, p), map)) {
pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr);
print_tracking(s, p);
}
if (n->nr_partial || slabs_node(s, node))
return 1;
}
- sysfs_slab_remove(s);
return 0;
}
page = alloc_pages_node(node, flags, order);
if (page) {
ptr = page_address(page);
- mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
- 1 << order);
+ mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B,
+ PAGE_SIZE << order);
}
return kmalloc_large_node_hook(ptr, size, flags);
BUG_ON(!PageCompound(page));
kfree_hook(object);
- mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
- -(1 << order));
+ mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B,
+ -(PAGE_SIZE << order));
__free_pages(page, order);
return;
}
return ret;
}
-#ifdef CONFIG_MEMCG
-void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s)
-{
- /*
- * Called with all the locks held after a sched RCU grace period.
- * Even if @s becomes empty after shrinking, we can't know that @s
- * doesn't have allocations already in-flight and thus can't
- * destroy @s until the associated memcg is released.
- *
- * However, let's remove the sysfs files for empty caches here.
- * Each cache has a lot of interface files which aren't
- * particularly useful for empty draining caches; otherwise, we can
- * easily end up with millions of unnecessary sysfs files on
- * systems which have a lot of memory and transient cgroups.
- */
- if (!__kmem_cache_shrink(s))
- sysfs_slab_remove(s);
-}
-
-void __kmemcg_cache_deactivate(struct kmem_cache *s)
-{
- /*
- * Disable empty slabs caching. Used to avoid pinning offline
- * memory cgroups by kmem pages that can be freed.
- */
- slub_set_cpu_partial(s, 0);
- s->min_partial = 0;
-}
-#endif /* CONFIG_MEMCG */
-
static int slab_mem_going_offline_callback(void *arg)
{
struct kmem_cache *s;
p->slab_cache = s;
#endif
}
- slab_init_memcg_params(s);
list_add(&s->list, &slab_caches);
- memcg_link_cache(s, NULL);
return s;
}
__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
slab_flags_t flags, void (*ctor)(void *))
{
- struct kmem_cache *s, *c;
+ struct kmem_cache *s;
s = find_mergeable(size, align, flags, name, ctor);
if (s) {
s->object_size = max(s->object_size, size);
s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
- for_each_memcg_cache(c, s) {
- c->object_size = s->object_size;
- c->inuse = max(c->inuse, ALIGN(size, sizeof(void *)));
- }
-
if (sysfs_slab_alias(s, name)) {
s->refcount--;
s = NULL;
if (slab_state <= UP)
return 0;
- memcg_propagate_slab_attrs(s);
err = sysfs_slab_add(s);
if (err)
__kmem_cache_release(s);
/* Now we know that a valid freelist exists */
map = get_map(s, page);
for_each_object(p, s, addr, page->objects) {
- u8 val = test_bit(slab_index(p, s, addr), map) ?
+ u8 val = test_bit(__obj_to_index(s, addr, p), map) ?
SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
if (!check_object(s, page, p, val))
map = get_map(s, page);
for_each_object(p, s, addr, page->objects)
- if (!test_bit(slab_index(p, s, addr), map))
+ if (!test_bit(__obj_to_index(s, addr, p), map))
add_location(t, s, get_track(s, p, alloc));
put_map(map);
}
const char *buf, size_t length)
{
if (buf[0] == '1')
- kmem_cache_shrink_all(s);
+ kmem_cache_shrink(s);
else
return -EINVAL;
return length;
return -EIO;
err = attribute->store(s, buf, len);
-#ifdef CONFIG_MEMCG
- if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
- struct kmem_cache *c;
-
- mutex_lock(&slab_mutex);
- if (s->max_attr_size < len)
- s->max_attr_size = len;
-
- /*
- * This is a best effort propagation, so this function's return
- * value will be determined by the parent cache only. This is
- * basically because not all attributes will have a well
- * defined semantics for rollbacks - most of the actions will
- * have permanent effects.
- *
- * Returning the error value of any of the children that fail
- * is not 100 % defined, in the sense that users seeing the
- * error code won't be able to know anything about the state of
- * the cache.
- *
- * Only returning the error code for the parent cache at least
- * has well defined semantics. The cache being written to
- * directly either failed or succeeded, in which case we loop
- * through the descendants with best-effort propagation.
- */
- for_each_memcg_cache(c, s)
- attribute->store(c, buf, len);
- mutex_unlock(&slab_mutex);
- }
-#endif
return err;
}
-static void memcg_propagate_slab_attrs(struct kmem_cache *s)
-{
-#ifdef CONFIG_MEMCG
- int i;
- char *buffer = NULL;
- struct kmem_cache *root_cache;
-
- if (is_root_cache(s))
- return;
-
- root_cache = s->memcg_params.root_cache;
-
- /*
- * This mean this cache had no attribute written. Therefore, no point
- * in copying default values around
- */
- if (!root_cache->max_attr_size)
- return;
-
- for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
- char mbuf[64];
- char *buf;
- struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
- ssize_t len;
-
- if (!attr || !attr->store || !attr->show)
- continue;
-
- /*
- * It is really bad that we have to allocate here, so we will
- * do it only as a fallback. If we actually allocate, though,
- * we can just use the allocated buffer until the end.
- *
- * Most of the slub attributes will tend to be very small in
- * size, but sysfs allows buffers up to a page, so they can
- * theoretically happen.
- */
- if (buffer)
- buf = buffer;
- else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf) &&
- !IS_ENABLED(CONFIG_SLUB_STATS))
- buf = mbuf;
- else {
- buffer = (char *) get_zeroed_page(GFP_KERNEL);
- if (WARN_ON(!buffer))
- continue;
- buf = buffer;
- }
-
- len = attr->show(root_cache, buf);
- if (len > 0)
- attr->store(s, buf, len);
- }
-
- if (buffer)
- free_page((unsigned long)buffer);
-#endif /* CONFIG_MEMCG */
-}
-
static void kmem_cache_release(struct kobject *k)
{
slab_kmem_cache_release(to_slab(k));
static inline struct kset *cache_kset(struct kmem_cache *s)
{
-#ifdef CONFIG_MEMCG
- if (!is_root_cache(s))
- return s->memcg_params.root_cache->memcg_kset;
-#endif
return slab_kset;
}
return name;
}
-static void sysfs_slab_remove_workfn(struct work_struct *work)
-{
- struct kmem_cache *s =
- container_of(work, struct kmem_cache, kobj_remove_work);
-
- if (!s->kobj.state_in_sysfs)
- /*
- * For a memcg cache, this may be called during
- * deactivation and again on shutdown. Remove only once.
- * A cache is never shut down before deactivation is
- * complete, so no need to worry about synchronization.
- */
- goto out;
-
-#ifdef CONFIG_MEMCG
- kset_unregister(s->memcg_kset);
-#endif
-out:
- kobject_put(&s->kobj);
-}
-
static int sysfs_slab_add(struct kmem_cache *s)
{
int err;
struct kset *kset = cache_kset(s);
int unmergeable = slab_unmergeable(s);
- INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
-
if (!kset) {
kobject_init(&s->kobj, &slab_ktype);
return 0;
if (err)
goto out_del_kobj;
-#ifdef CONFIG_MEMCG
- if (is_root_cache(s) && memcg_sysfs_enabled) {
- s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj);
- if (!s->memcg_kset) {
- err = -ENOMEM;
- goto out_del_kobj;
- }
- }
-#endif
-
if (!unmergeable) {
/* Setup first alias */
sysfs_slab_alias(s, s->name);
goto out;
}
-static void sysfs_slab_remove(struct kmem_cache *s)
-{
- if (slab_state < FULL)
- /*
- * Sysfs has not been setup yet so no need to remove the
- * cache from sysfs.
- */
- return;
-
- kobject_get(&s->kobj);
- schedule_work(&s->kobj_remove_work);
-}
-
void sysfs_slab_unlink(struct kmem_cache *s)
{
if (slab_state >= FULL)