* freepointer to be restored incorrectly.
*/
return (void *)((unsigned long)ptr ^ s->random ^
- (unsigned long)kasan_reset_tag((void *)ptr_addr));
+ swab((unsigned long)kasan_reset_tag((void *)ptr_addr)));
#else
return ptr;
#endif
* not vanish from under us.
*/
static unsigned long *get_map(struct kmem_cache *s, struct page *page)
+ __acquires(&object_map_lock)
{
void *p;
void *addr = page_address(page);
return object_map;
}
-static void put_map(unsigned long *map)
+static void put_map(unsigned long *map) __releases(&object_map_lock)
{
VM_BUG_ON(map != object_map);
lockdep_assert_held(&object_map_lock);
metadata_access_disable();
}
+/*
+ * See comment in calculate_sizes().
+ */
+static inline bool freeptr_outside_object(struct kmem_cache *s)
+{
+ return s->offset >= s->inuse;
+}
+
+/*
+ * Return offset of the end of info block which is inuse + free pointer if
+ * not overlapping with object.
+ */
+static inline unsigned int get_info_end(struct kmem_cache *s)
+{
+ if (freeptr_outside_object(s))
+ return s->inuse + sizeof(void *);
+ else
+ return s->inuse;
+}
+
static struct track *get_track(struct kmem_cache *s, void *object,
enum track_item alloc)
{
struct track *p;
- if (s->offset)
- p = object + s->offset + sizeof(void *);
- else
- p = object + s->inuse;
+ p = object + get_info_end(s);
return p + alloc;
}
va_end(args);
}
+static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
+ void *freelist, void *nextfree)
+{
+ if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
+ !check_valid_pointer(s, page, nextfree)) {
+ object_err(s, page, freelist, "Freechain corrupt");
+ freelist = NULL;
+ slab_fix(s, "Isolate corrupted freechain");
+ return true;
+ }
+
+ return false;
+}
+
static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
{
unsigned int off; /* Offset of last byte */
print_section(KERN_ERR, "Redzone ", p + s->object_size,
s->inuse - s->object_size);
- if (s->offset)
- off = s->offset + sizeof(void *);
- else
- off = s->inuse;
+ off = get_info_end(s);
if (s->flags & SLAB_STORE_USER)
off += 2 * sizeof(struct track);
* object address
* Bytes of the object to be managed.
* If the freepointer may overlay the object then the free
- * pointer is the first word of the object.
+ * pointer is at the middle of the object.
*
* Poisoning uses 0x6b (POISON_FREE) and the last byte is
* 0xa5 (POISON_END)
static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
{
- unsigned long off = s->inuse; /* The end of info */
-
- if (s->offset)
- /* Freepointer is placed after the object. */
- off += sizeof(void *);
+ unsigned long off = get_info_end(s); /* The end of info */
if (s->flags & SLAB_STORE_USER)
/* We also have user information there */
check_pad_bytes(s, page, p);
}
- if (!s->offset && val == SLUB_RED_ACTIVE)
+ if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
/*
* Object and freepointer overlap. Cannot check
* freepointer while object is allocated.
static inline void dec_slabs_node(struct kmem_cache *s, int node,
int objects) {}
+static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
+ void *freelist, void *nextfree)
+{
+ return false;
+}
#endif /* CONFIG_SLUB_DEBUG */
/*
struct zonelist *zonelist;
struct zoneref *z;
struct zone *zone;
- enum zone_type high_zoneidx = gfp_zone(flags);
+ enum zone_type highest_zoneidx = gfp_zone(flags);
void *object;
unsigned int cpuset_mems_cookie;
do {
cpuset_mems_cookie = read_mems_allowed_begin();
zonelist = node_zonelist(mempolicy_slab_node(), flags);
- for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+ for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
struct kmem_cache_node *n;
n = get_node(s, zone_to_nid(zone));
#ifdef CONFIG_PREEMPTION
/*
- * Calculate the next globally unique transaction for disambiguiation
+ * Calculate the next globally unique transaction for disambiguation
* during cmpxchg. The transactions start with the cpu number and are then
* incremented by CONFIG_NR_CPUS.
*/
void *prior;
unsigned long counters;
+ /*
+ * If 'nextfree' is invalid, it is possible that the object at
+ * 'freelist' is already corrupted. So isolate all objects
+ * starting at 'freelist'.
+ */
+ if (freelist_corrupted(s, page, freelist, nextfree))
+ break;
+
do {
prior = page->freelist;
counters = page->counters;
struct kmem_cache_node *n = NULL, *n2 = NULL;
struct page *page, *discard_page = NULL;
- while ((page = c->partial)) {
+ while ((page = slub_percpu_partial(c))) {
struct page new;
struct page old;
- c->partial = page->next;
+ slub_set_percpu_partial(c, page);
n2 = get_node(s, page_to_nid(page));
if (n != n2) {
if (oldpage) {
pobjects = oldpage->pobjects;
pages = oldpage->pages;
- if (drain && pobjects > s->cpu_partial) {
+ if (drain && pobjects > slub_cpu_partial(s)) {
unsigned long flags;
/*
* partial array is full. Move the existing
} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
!= oldpage);
- if (unlikely(!s->cpu_partial)) {
+ if (unlikely(!slub_cpu_partial(s))) {
unsigned long flags;
local_irq_save(flags);
* 50% to keep some capacity around for frees.
*/
if (!kmem_cache_has_cpu_partial(s))
- s->cpu_partial = 0;
+ slub_set_cpu_partial(s, 0);
else if (s->size >= PAGE_SIZE)
- s->cpu_partial = 2;
+ slub_set_cpu_partial(s, 2);
else if (s->size >= 1024)
- s->cpu_partial = 6;
+ slub_set_cpu_partial(s, 6);
else if (s->size >= 256)
- s->cpu_partial = 13;
+ slub_set_cpu_partial(s, 13);
else
- s->cpu_partial = 30;
+ slub_set_cpu_partial(s, 30);
#endif
}
{
slab_flags_t flags = s->flags;
unsigned int size = s->object_size;
+ unsigned int freepointer_area;
unsigned int order;
/*
* the possible location of the free pointer.
*/
size = ALIGN(size, sizeof(void *));
+ /*
+ * This is the area of the object where a freepointer can be
+ * safely written. If redzoning adds more to the inuse size, we
+ * can't use that portion for writing the freepointer, so
+ * s->offset must be limited within this for the general case.
+ */
+ freepointer_area = size;
#ifdef CONFIG_SLUB_DEBUG
/*
*
* This is the case if we do RCU, have a constructor or
* destructor or are poisoning the objects.
+ *
+ * The assumption that s->offset >= s->inuse means free
+ * pointer is outside of the object is used in the
+ * freeptr_outside_object() function. If that is no
+ * longer true, the function needs to be modified.
*/
s->offset = size;
size += sizeof(void *);
+ } else if (freepointer_area > sizeof(void *)) {
+ /*
+ * Store freelist pointer near middle of object to keep
+ * it away from the edges of the object to avoid small
+ * sized over/underflows from neighboring allocations.
+ */
+ s->offset = ALIGN(freepointer_area / 2, sizeof(void *));
}
#ifdef CONFIG_SLUB_DEBUG
}
static void list_slab_objects(struct kmem_cache *s, struct page *page,
- const char *text)
+ const char *text, unsigned long *map)
{
#ifdef CONFIG_SLUB_DEBUG
void *addr = page_address(page);
void *p;
- unsigned long *map;
+
+ if (!map)
+ return;
slab_err(s, page, text, s->name);
slab_lock(page);
print_tracking(s, p);
}
}
- put_map(map);
-
slab_unlock(page);
#endif
}
{
LIST_HEAD(discard);
struct page *page, *h;
+ unsigned long *map = NULL;
+
+#ifdef CONFIG_SLUB_DEBUG
+ map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
+#endif
BUG_ON(irqs_disabled());
spin_lock_irq(&n->list_lock);
list_add(&page->slab_list, &discard);
} else {
list_slab_objects(s, page,
- "Objects remaining in %s on __kmem_cache_shutdown()");
+ "Objects remaining in %s on __kmem_cache_shutdown()",
+ map);
}
}
spin_unlock_irq(&n->list_lock);
+#ifdef CONFIG_SLUB_DEBUG
+ bitmap_free(map);
+#endif
+
list_for_each_entry_safe(page, h, &discard, slab_list)
discard_slab(s, page);
}
return ret;
}
+EXPORT_SYMBOL(__kmalloc_track_caller);
#ifdef CONFIG_NUMA
void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
return ret;
}
+EXPORT_SYMBOL(__kmalloc_node_track_caller);
#endif
#ifdef CONFIG_SYSFS
*/
if (buffer)
buf = buffer;
- else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf))
+ else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf) &&
+ !IS_ENABLED(CONFIG_SLUB_STATS))
buf = mbuf;
else {
buffer = (char *) get_zeroed_page(GFP_KERNEL);
.release = kmem_cache_release,
};
-static int uevent_filter(struct kset *kset, struct kobject *kobj)
-{
- struct kobj_type *ktype = get_ktype(kobj);
-
- if (ktype == &slab_ktype)
- return 1;
- return 0;
-}
-
-static const struct kset_uevent_ops slab_uevent_ops = {
- .filter = uevent_filter,
-};
-
static struct kset *slab_kset;
static inline struct kset *cache_kset(struct kmem_cache *s)
#ifdef CONFIG_MEMCG
kset_unregister(s->memcg_kset);
#endif
- kobject_uevent(&s->kobj, KOBJ_REMOVE);
out:
kobject_put(&s->kobj);
}
s->kobj.kset = kset;
err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
- if (err)
+ if (err) {
+ kobject_put(&s->kobj);
goto out;
+ }
err = sysfs_create_group(&s->kobj, &slab_attr_group);
if (err)
}
#endif
- kobject_uevent(&s->kobj, KOBJ_ADD);
if (!unmergeable) {
/* Setup first alias */
sysfs_slab_alias(s, s->name);
mutex_lock(&slab_mutex);
- slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
+ slab_kset = kset_create_and_add("slab", NULL, kernel_kobj);
if (!slab_kset) {
mutex_unlock(&slab_mutex);
pr_err("Cannot register slab subsystem.\n");