Merge branch 'slab/for-6.8/slub-hook-cleanups' into slab/for-next

[linux-2.6-microblaze.git] / mm / slub.c
diff --git a/mm/slub.c b/mm/slub.c

index 4fc203a..fac0738 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -34,6 +34,7 @@
  #include <linux/memory.h>
  #include <linux/math64.h>
  #include <linux/fault-inject.h>
+#include <linux/kmemleak.h>
  #include <linux/stacktrace.h>
  #include <linux/prefetch.h>
  #include <linux/memcontrol.h>
@@ -345,6 +346,60 @@ static void debugfs_slab_add(struct kmem_cache *);
  static inline void debugfs_slab_add(struct kmem_cache *s) { }
  #endif
  
+enum stat_item {
+       ALLOC_FASTPATH,         /* Allocation from cpu slab */
+       ALLOC_SLOWPATH,         /* Allocation by getting a new cpu slab */
+       FREE_FASTPATH,          /* Free to cpu slab */
+       FREE_SLOWPATH,          /* Freeing not to cpu slab */
+       FREE_FROZEN,            /* Freeing to frozen slab */
+       FREE_ADD_PARTIAL,       /* Freeing moves slab to partial list */
+       FREE_REMOVE_PARTIAL,    /* Freeing removes last object */
+       ALLOC_FROM_PARTIAL,     /* Cpu slab acquired from node partial list */
+       ALLOC_SLAB,             /* Cpu slab acquired from page allocator */
+       ALLOC_REFILL,           /* Refill cpu slab from slab freelist */
+       ALLOC_NODE_MISMATCH,    /* Switching cpu slab */
+       FREE_SLAB,              /* Slab freed to the page allocator */
+       CPUSLAB_FLUSH,          /* Abandoning of the cpu slab */
+       DEACTIVATE_FULL,        /* Cpu slab was full when deactivated */
+       DEACTIVATE_EMPTY,       /* Cpu slab was empty when deactivated */
+       DEACTIVATE_TO_HEAD,     /* Cpu slab was moved to the head of partials */
+       DEACTIVATE_TO_TAIL,     /* Cpu slab was moved to the tail of partials */
+       DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
+       DEACTIVATE_BYPASS,      /* Implicit deactivation */
+       ORDER_FALLBACK,         /* Number of times fallback was necessary */
+       CMPXCHG_DOUBLE_CPU_FAIL,/* Failures of this_cpu_cmpxchg_double */
+       CMPXCHG_DOUBLE_FAIL,    /* Failures of slab freelist update */
+       CPU_PARTIAL_ALLOC,      /* Used cpu partial on alloc */
+       CPU_PARTIAL_FREE,       /* Refill cpu partial on free */
+       CPU_PARTIAL_NODE,       /* Refill cpu partial from node partial */
+       CPU_PARTIAL_DRAIN,      /* Drain cpu partial to node partial */
+       NR_SLUB_STAT_ITEMS
+};
+
+#ifndef CONFIG_SLUB_TINY
+/*
+ * When changing the layout, make sure freelist and tid are still compatible
+ * with this_cpu_cmpxchg_double() alignment requirements.
+ */
+struct kmem_cache_cpu {
+       union {
+               struct {
+                       void **freelist;        /* Pointer to next available object */
+                       unsigned long tid;      /* Globally unique transaction id */
+               };
+               freelist_aba_t freelist_tid;
+       };
+       struct slab *slab;      /* The slab from which we are allocating */
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+       struct slab *partial;   /* Partially allocated frozen slabs */
+#endif
+       local_lock_t lock;      /* Protects the fields above */
+#ifdef CONFIG_SLUB_STATS
+       unsigned int stat[NR_SLUB_STAT_ITEMS];
+#endif
+};
+#endif /* CONFIG_SLUB_TINY */
+
  static inline void stat(const struct kmem_cache *s, enum stat_item si)
  {
  #ifdef CONFIG_SLUB_STATS
@@ -356,6 +411,41 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
  #endif
  }
  
+static inline
+void stat_add(const struct kmem_cache *s, enum stat_item si, int v)
+{
+#ifdef CONFIG_SLUB_STATS
+       raw_cpu_add(s->cpu_slab->stat[si], v);
+#endif
+}
+
+/*
+ * The slab lists for all objects.
+ */
+struct kmem_cache_node {
+       spinlock_t list_lock;
+       unsigned long nr_partial;
+       struct list_head partial;
+#ifdef CONFIG_SLUB_DEBUG
+       atomic_long_t nr_slabs;
+       atomic_long_t total_objects;
+       struct list_head full;
+#endif
+};
+
+static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
+{
+       return s->node[node];
+}
+
+/*
+ * Iterator over all nodes. The body will be executed for each node that has
+ * a kmem_cache_node structure allocated (which is true for all online nodes)
+ */
+#define for_each_kmem_cache_node(__s, __node, __n) \
+       for (__node = 0; __node < nr_node_ids; __node++) \
+                if ((__n = get_node(__s, __node)))
+
  /*
   * Tracks for which NUMA nodes we have kmem_cache_nodes allocated.
   * Corresponds to node_state[N_NORMAL_MEMORY], but can temporarily
@@ -1774,12 +1864,214 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
  #endif
  #endif /* CONFIG_SLUB_DEBUG */
  
+static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
+{
+       return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
+               NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
+}
+
+#ifdef CONFIG_MEMCG_KMEM
+static inline void memcg_free_slab_cgroups(struct slab *slab)
+{
+       kfree(slab_objcgs(slab));
+       slab->memcg_data = 0;
+}
+
+static inline size_t obj_full_size(struct kmem_cache *s)
+{
+       /*
+        * For each accounted object there is an extra space which is used
+        * to store obj_cgroup membership. Charge it too.
+        */
+       return s->size + sizeof(struct obj_cgroup *);
+}
+
+/*
+ * Returns false if the allocation should fail.
+ */
+static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s,
+                                       struct list_lru *lru,
+                                       struct obj_cgroup **objcgp,
+                                       size_t objects, gfp_t flags)
+{
+       /*
+        * The obtained objcg pointer is safe to use within the current scope,
+        * defined by current task or set_active_memcg() pair.
+        * obj_cgroup_get() is used to get a permanent reference.
+        */
+       struct obj_cgroup *objcg = current_obj_cgroup();
+       if (!objcg)
+               return true;
+
+       if (lru) {
+               int ret;
+               struct mem_cgroup *memcg;
+
+               memcg = get_mem_cgroup_from_objcg(objcg);
+               ret = memcg_list_lru_alloc(memcg, lru, flags);
+               css_put(&memcg->css);
+
+               if (ret)
+                       return false;
+       }
+
+       if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s)))
+               return false;
+
+       *objcgp = objcg;
+       return true;
+}
+
+/*
+ * Returns false if the allocation should fail.
+ */
+static __fastpath_inline
+bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
+                              struct obj_cgroup **objcgp, size_t objects,
+                              gfp_t flags)
+{
+       if (!memcg_kmem_online())
+               return true;
+
+       if (likely(!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT)))
+               return true;
+
+       return likely(__memcg_slab_pre_alloc_hook(s, lru, objcgp, objects,
+                                                 flags));
+}
+
+static void __memcg_slab_post_alloc_hook(struct kmem_cache *s,
+                                        struct obj_cgroup *objcg,
+                                        gfp_t flags, size_t size,
+                                        void **p)
+{
+       struct slab *slab;
+       unsigned long off;
+       size_t i;
+
+       flags &= gfp_allowed_mask;
+
+       for (i = 0; i < size; i++) {
+               if (likely(p[i])) {
+                       slab = virt_to_slab(p[i]);
+
+                       if (!slab_objcgs(slab) &&
+                           memcg_alloc_slab_cgroups(slab, s, flags, false)) {
+                               obj_cgroup_uncharge(objcg, obj_full_size(s));
+                               continue;
+                       }
+
+                       off = obj_to_index(s, slab, p[i]);
+                       obj_cgroup_get(objcg);
+                       slab_objcgs(slab)[off] = objcg;
+                       mod_objcg_state(objcg, slab_pgdat(slab),
+                                       cache_vmstat_idx(s), obj_full_size(s));
+               } else {
+                       obj_cgroup_uncharge(objcg, obj_full_size(s));
+               }
+       }
+}
+
+static __fastpath_inline
+void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
+                               gfp_t flags, size_t size, void **p)
+{
+       if (likely(!memcg_kmem_online() || !objcg))
+               return;
+
+       return __memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
+}
+
+static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
+                                  void **p, int objects,
+                                  struct obj_cgroup **objcgs)
+{
+       for (int i = 0; i < objects; i++) {
+               struct obj_cgroup *objcg;
+               unsigned int off;
+
+               off = obj_to_index(s, slab, p[i]);
+               objcg = objcgs[off];
+               if (!objcg)
+                       continue;
+
+               objcgs[off] = NULL;
+               obj_cgroup_uncharge(objcg, obj_full_size(s));
+               mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
+                               -obj_full_size(s));
+               obj_cgroup_put(objcg);
+       }
+}
+
+static __fastpath_inline
+void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
+                         int objects)
+{
+       struct obj_cgroup **objcgs;
+
+       if (!memcg_kmem_online())
+               return;
+
+       objcgs = slab_objcgs(slab);
+       if (likely(!objcgs))
+               return;
+
+       __memcg_slab_free_hook(s, slab, p, objects, objcgs);
+}
+
+static inline
+void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
+                          struct obj_cgroup *objcg)
+{
+       if (objcg)
+               obj_cgroup_uncharge(objcg, objects * obj_full_size(s));
+}
+#else /* CONFIG_MEMCG_KMEM */
+static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
+{
+       return NULL;
+}
+
+static inline void memcg_free_slab_cgroups(struct slab *slab)
+{
+}
+
+static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
+                                            struct list_lru *lru,
+                                            struct obj_cgroup **objcgp,
+                                            size_t objects, gfp_t flags)
+{
+       return true;
+}
+
+static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
+                                             struct obj_cgroup *objcg,
+                                             gfp_t flags, size_t size,
+                                             void **p)
+{
+}
+
+static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
+                                       void **p, int objects)
+{
+}
+
+static inline
+void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
+                                struct obj_cgroup *objcg)
+{
+}
+#endif /* CONFIG_MEMCG_KMEM */
+
  /*
   * Hooks for other subsystems that check memory allocations. In a typical
   * production configuration these hooks all should produce no code at all.
+ *
+ * Returns true if freeing of the object can proceed, false if its reuse
+ * was delayed by KASAN quarantine, or it was returned to KFENCE.
   */
-static __always_inline bool slab_free_hook(struct kmem_cache *s,
-                                               void *x, bool init)
+static __always_inline
+bool slab_free_hook(struct kmem_cache *s, void *x, bool init)
  {
         kmemleak_free_recursive(x, s->flags);
         kmsan_slab_free(s, x);
@@ -1794,6 +2086,9 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s,
                 __kcsan_check_access(x, s->object_size,
                                      KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
  
+       if (kfence_free(x))
+               return false;
+
         /*
          * As memory initialization might be integrated into KASAN,
          * kasan_slab_free and initialization memset's must be
@@ -1802,7 +2097,7 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s,
          * The initialization memset's clear the object and the metadata,
          * but don't touch the SLAB redzone.
          */
-       if (init) {
+       if (unlikely(init)) {
                 int rsize;
  
                 if (!kasan_has_integrated_init())
@@ -1812,7 +2107,7 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s,
                        s->size - s->inuse - rsize);
         }
         /* KASAN might put x into memory quarantine, delaying its reuse. */
-       return kasan_slab_free(s, x, init);
+       return !kasan_slab_free(s, x, init);
  }
  
  static inline bool slab_free_freelist_hook(struct kmem_cache *s,
@@ -1822,23 +2117,26 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
  
         void *object;
         void *next = *head;
-       void *old_tail = *tail ? *tail : *head;
+       void *old_tail = *tail;
+       bool init;
  
         if (is_kfence_address(next)) {
                 slab_free_hook(s, next, false);
-               return true;
+               return false;
         }
  
         /* Head and tail of the reconstructed freelist */
         *head = NULL;
         *tail = NULL;
  
+       init = slab_want_init_on_free(s);
+
         do {
                 object = next;
                 next = get_freepointer(s, object);
  
                 /* If object's reuse doesn't have to be delayed */
-               if (!slab_free_hook(s, object, slab_want_init_on_free(s))) {
+               if (likely(slab_free_hook(s, object, init))) {
                         /* Move object to the new freelist */
                         set_freepointer(s, object, *head);
                         *head = object;
@@ -1853,9 +2151,6 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
                 }
         } while (object != old_tail);
  
-       if (*head == *tail)
-               *tail = NULL;
-
         return *head != NULL;
  }
  
@@ -2008,6 +2303,26 @@ static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
  }
  #endif /* CONFIG_SLAB_FREELIST_RANDOM */
  
+static __always_inline void account_slab(struct slab *slab, int order,
+                                        struct kmem_cache *s, gfp_t gfp)
+{
+       if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
+               memcg_alloc_slab_cgroups(slab, s, gfp, true);
+
+       mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
+                           PAGE_SIZE << order);
+}
+
+static __always_inline void unaccount_slab(struct slab *slab, int order,
+                                          struct kmem_cache *s)
+{
+       if (memcg_kmem_online())
+               memcg_free_slab_cgroups(slab);
+
+       mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
+                           -(PAGE_SIZE << order));
+}
+
  static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
  {
         struct slab *slab;
@@ -3420,6 +3735,86 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
                         0, sizeof(void *));
  }
  
+noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
+{
+       if (__should_failslab(s, gfpflags))
+               return -ENOMEM;
+       return 0;
+}
+ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
+
+static __fastpath_inline
+struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
+                                      struct list_lru *lru,
+                                      struct obj_cgroup **objcgp,
+                                      size_t size, gfp_t flags)
+{
+       flags &= gfp_allowed_mask;
+
+       might_alloc(flags);
+
+       if (unlikely(should_failslab(s, flags)))
+               return NULL;
+
+       if (unlikely(!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags)))
+               return NULL;
+
+       return s;
+}
+
+static __fastpath_inline
+void slab_post_alloc_hook(struct kmem_cache *s,        struct obj_cgroup *objcg,
+                         gfp_t flags, size_t size, void **p, bool init,
+                         unsigned int orig_size)
+{
+       unsigned int zero_size = s->object_size;
+       bool kasan_init = init;
+       size_t i;
+       gfp_t init_flags = flags & gfp_allowed_mask;
+
+       /*
+        * For kmalloc object, the allocated memory size(object_size) is likely
+        * larger than the requested size(orig_size). If redzone check is
+        * enabled for the extra space, don't zero it, as it will be redzoned
+        * soon. The redzone operation for this extra space could be seen as a
+        * replacement of current poisoning under certain debug option, and
+        * won't break other sanity checks.
+        */
+       if (kmem_cache_debug_flags(s, SLAB_STORE_USER | SLAB_RED_ZONE) &&
+           (s->flags & SLAB_KMALLOC))
+               zero_size = orig_size;
+
+       /*
+        * When slub_debug is enabled, avoid memory initialization integrated
+        * into KASAN and instead zero out the memory via the memset below with
+        * the proper size. Otherwise, KASAN might overwrite SLUB redzones and
+        * cause false-positive reports. This does not lead to a performance
+        * penalty on production builds, as slub_debug is not intended to be
+        * enabled there.
+        */
+       if (__slub_debug_enabled())
+               kasan_init = false;
+
+       /*
+        * As memory initialization might be integrated into KASAN,
+        * kasan_slab_alloc and initialization memset must be
+        * kept together to avoid discrepancies in behavior.
+        *
+        * As p[i] might get tagged, memset and kmemleak hook come after KASAN.
+        */
+       for (i = 0; i < size; i++) {
+               p[i] = kasan_slab_alloc(s, p[i], init_flags, kasan_init);
+               if (p[i] && init && (!kasan_init ||
+                                    !kasan_has_integrated_init()))
+                       memset(p[i], 0, zero_size);
+               kmemleak_alloc_recursive(p[i], s->object_size, 1,
+                                        s->flags, init_flags);
+               kmsan_slab_alloc(s, p[i], init_flags);
+       }
+
+       memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
+}
+
  /*
   * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
   * have the fastpath folded into their functions. So no function call
@@ -3438,7 +3833,7 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
         bool init = false;
  
         s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
-       if (!s)
+       if (unlikely(!s))
                 return NULL;
  
         object = kfence_alloc(s, orig_size, gfpflags);
@@ -3460,53 +3855,169 @@ out:
         return object;
  }
  
-static __fastpath_inline void *slab_alloc(struct kmem_cache *s, struct list_lru *lru,
-               gfp_t gfpflags, unsigned long addr, size_t orig_size)
+void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
  {
-       return slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, addr, orig_size);
+       void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE, _RET_IP_,
+                                   s->object_size);
+
+       trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, NUMA_NO_NODE);
+
+       return ret;
  }
+EXPORT_SYMBOL(kmem_cache_alloc);
  
-static __fastpath_inline
-void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
-                            gfp_t gfpflags)
+void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
+                          gfp_t gfpflags)
  {
-       void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
+       void *ret = slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, _RET_IP_,
+                                   s->object_size);
  
         trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, NUMA_NO_NODE);
  
         return ret;
  }
+EXPORT_SYMBOL(kmem_cache_alloc_lru);
  
-void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
+/**
+ * kmem_cache_alloc_node - Allocate an object on the specified node
+ * @s: The cache to allocate from.
+ * @gfpflags: See kmalloc().
+ * @node: node number of the target node.
+ *
+ * Identical to kmem_cache_alloc but it will allocate memory on the given
+ * node, which can improve the performance for cpu bound structures.
+ *
+ * Fallback to other node is possible if __GFP_THISNODE is not set.
+ *
+ * Return: pointer to the new object or %NULL in case of error
+ */
+void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
  {
-       return __kmem_cache_alloc_lru(s, NULL, gfpflags);
+       void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
+
+       trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, node);
+
+       return ret;
  }
-EXPORT_SYMBOL(kmem_cache_alloc);
+EXPORT_SYMBOL(kmem_cache_alloc_node);
  
-void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
-                          gfp_t gfpflags)
+/*
+ * To avoid unnecessary overhead, we pass through large allocation requests
+ * directly to the page allocator. We use __GFP_COMP, because we will need to
+ * know the allocation order to free the pages properly in kfree.
+ */
+static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
  {
-       return __kmem_cache_alloc_lru(s, lru, gfpflags);
+       struct page *page;
+       void *ptr = NULL;
+       unsigned int order = get_order(size);
+
+       if (unlikely(flags & GFP_SLAB_BUG_MASK))
+               flags = kmalloc_fix_flags(flags);
+
+       flags |= __GFP_COMP;
+       page = alloc_pages_node(node, flags, order);
+       if (page) {
+               ptr = page_address(page);
+               mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
+                                     PAGE_SIZE << order);
+       }
+
+       ptr = kasan_kmalloc_large(ptr, size, flags);
+       /* As ptr might get tagged, call kmemleak hook after KASAN. */
+       kmemleak_alloc(ptr, size, 1, flags);
+       kmsan_kmalloc_large(ptr, size, flags);
+
+       return ptr;
  }
-EXPORT_SYMBOL(kmem_cache_alloc_lru);
  
-void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags,
-                             int node, size_t orig_size,
-                             unsigned long caller)
+void *kmalloc_large(size_t size, gfp_t flags)
+{
+       void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE);
+
+       trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
+                     flags, NUMA_NO_NODE);
+       return ret;
+}
+EXPORT_SYMBOL(kmalloc_large);
+
+void *kmalloc_large_node(size_t size, gfp_t flags, int node)
  {
-       return slab_alloc_node(s, NULL, gfpflags, node,
-                              caller, orig_size);
+       void *ret = __kmalloc_large_node(size, flags, node);
+
+       trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
+                     flags, node);
+       return ret;
  }
+EXPORT_SYMBOL(kmalloc_large_node);
  
-void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
+static __always_inline
+void *__do_kmalloc_node(size_t size, gfp_t flags, int node,
+                       unsigned long caller)
  {
-       void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
+       struct kmem_cache *s;
+       void *ret;
  
-       trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, node);
+       if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
+               ret = __kmalloc_large_node(size, flags, node);
+               trace_kmalloc(caller, ret, size,
+                             PAGE_SIZE << get_order(size), flags, node);
+               return ret;
+       }
+
+       if (unlikely(!size))
+               return ZERO_SIZE_PTR;
+
+       s = kmalloc_slab(size, flags, caller);
  
+       ret = slab_alloc_node(s, NULL, flags, node, caller, size);
+       ret = kasan_kmalloc(s, ret, size, flags);
+       trace_kmalloc(caller, ret, size, s->size, flags, node);
         return ret;
  }
-EXPORT_SYMBOL(kmem_cache_alloc_node);
+
+void *__kmalloc_node(size_t size, gfp_t flags, int node)
+{
+       return __do_kmalloc_node(size, flags, node, _RET_IP_);
+}
+EXPORT_SYMBOL(__kmalloc_node);
+
+void *__kmalloc(size_t size, gfp_t flags)
+{
+       return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_);
+}
+EXPORT_SYMBOL(__kmalloc);
+
+void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
+                                 int node, unsigned long caller)
+{
+       return __do_kmalloc_node(size, flags, node, caller);
+}
+EXPORT_SYMBOL(__kmalloc_node_track_caller);
+
+void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
+{
+       void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE,
+                                           _RET_IP_, size);
+
+       trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, NUMA_NO_NODE);
+
+       ret = kasan_kmalloc(s, ret, size, gfpflags);
+       return ret;
+}
+EXPORT_SYMBOL(kmalloc_trace);
+
+void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
+                        int node, size_t size)
+{
+       void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
+
+       trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, node);
+
+       ret = kasan_kmalloc(s, ret, size, gfpflags);
+       return ret;
+}
+EXPORT_SYMBOL(kmalloc_node_trace);
  
  static noinline void free_to_partial_list(
         struct kmem_cache *s, struct slab *slab,
@@ -3592,9 +4103,6 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
  
         stat(s, FREE_SLOWPATH);
  
-       if (kfence_free(head))
-               return;
-
         if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
                 free_to_partial_list(s, slab, head, tail, cnt, addr);
                 return;
@@ -3716,7 +4224,6 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
                                 struct slab *slab, void *head, void *tail,
                                 int cnt, unsigned long addr)
  {
-       void *tail_obj = tail ? : head;
         struct kmem_cache_cpu *c;
         unsigned long tid;
         void **freelist;
@@ -3735,14 +4242,14 @@ redo:
         barrier();
  
         if (unlikely(slab != c->slab)) {
-               __slab_free(s, slab, head, tail_obj, cnt, addr);
+               __slab_free(s, slab, head, tail, cnt, addr);
                 return;
         }
  
         if (USE_LOCKLESS_FAST_PATH()) {
                 freelist = READ_ONCE(c->freelist);
  
-               set_freepointer(s, tail_obj, freelist);
+               set_freepointer(s, tail, freelist);
  
                 if (unlikely(!__update_cpu_freelist_fast(s, freelist, head, tid))) {
                         note_cmpxchg_failure("slab_free", s, tid);
@@ -3759,60 +4266,143 @@ redo:
                 tid = c->tid;
                 freelist = c->freelist;
  
-               set_freepointer(s, tail_obj, freelist);
+               set_freepointer(s, tail, freelist);
                 c->freelist = head;
                 c->tid = next_tid(tid);
  
                 local_unlock(&s->cpu_slab->lock);
         }
-       stat(s, FREE_FASTPATH);
+       stat_add(s, FREE_FASTPATH, cnt);
  }
  #else /* CONFIG_SLUB_TINY */
  static void do_slab_free(struct kmem_cache *s,
                                 struct slab *slab, void *head, void *tail,
                                 int cnt, unsigned long addr)
  {
-       void *tail_obj = tail ? : head;
-
-       __slab_free(s, slab, head, tail_obj, cnt, addr);
+       __slab_free(s, slab, head, tail, cnt, addr);
  }
  #endif /* CONFIG_SLUB_TINY */
  
-static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab,
-                                     void *head, void *tail, void **p, int cnt,
-                                     unsigned long addr)
+static __fastpath_inline
+void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
+              unsigned long addr)
+{
+       memcg_slab_free_hook(s, slab, &object, 1);
+
+       if (likely(slab_free_hook(s, object, slab_want_init_on_free(s))))
+               do_slab_free(s, slab, object, object, 1, addr);
+}
+
+static __fastpath_inline
+void slab_free_bulk(struct kmem_cache *s, struct slab *slab, void *head,
+                   void *tail, void **p, int cnt, unsigned long addr)
  {
         memcg_slab_free_hook(s, slab, p, cnt);
         /*
          * With KASAN enabled slab_free_freelist_hook modifies the freelist
          * to remove objects, whose reuse must be delayed.
          */
-       if (slab_free_freelist_hook(s, &head, &tail, &cnt))
+       if (likely(slab_free_freelist_hook(s, &head, &tail, &cnt)))
                 do_slab_free(s, slab, head, tail, cnt, addr);
  }
  
  #ifdef CONFIG_KASAN_GENERIC
  void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
  {
-       do_slab_free(cache, virt_to_slab(x), x, NULL, 1, addr);
+       do_slab_free(cache, virt_to_slab(x), x, x, 1, addr);
  }
  #endif
  
-void __kmem_cache_free(struct kmem_cache *s, void *x, unsigned long caller)
+static inline struct kmem_cache *virt_to_cache(const void *obj)
+{
+       struct slab *slab;
+
+       slab = virt_to_slab(obj);
+       if (WARN_ONCE(!slab, "%s: Object is not a Slab page!\n", __func__))
+               return NULL;
+       return slab->slab_cache;
+}
+
+static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
  {
-       slab_free(s, virt_to_slab(x), x, NULL, &x, 1, caller);
+       struct kmem_cache *cachep;
+
+       if (!IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) &&
+           !kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS))
+               return s;
+
+       cachep = virt_to_cache(x);
+       if (WARN(cachep && cachep != s,
+                "%s: Wrong slab cache. %s but object is from %s\n",
+                __func__, s->name, cachep->name))
+               print_tracking(cachep, x);
+       return cachep;
  }
  
+/**
+ * kmem_cache_free - Deallocate an object
+ * @s: The cache the allocation was from.
+ * @x: The previously allocated object.
+ *
+ * Free an object which was previously allocated from this
+ * cache.
+ */
  void kmem_cache_free(struct kmem_cache *s, void *x)
  {
         s = cache_from_obj(s, x);
         if (!s)
                 return;
         trace_kmem_cache_free(_RET_IP_, x, s);
-       slab_free(s, virt_to_slab(x), x, NULL, &x, 1, _RET_IP_);
+       slab_free(s, virt_to_slab(x), x, _RET_IP_);
  }
  EXPORT_SYMBOL(kmem_cache_free);
  
+static void free_large_kmalloc(struct folio *folio, void *object)
+{
+       unsigned int order = folio_order(folio);
+
+       if (WARN_ON_ONCE(order == 0))
+               pr_warn_once("object pointer: 0x%p\n", object);
+
+       kmemleak_free(object);
+       kasan_kfree_large(object);
+       kmsan_kfree_large(object);
+
+       mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
+                             -(PAGE_SIZE << order));
+       __free_pages(folio_page(folio, 0), order);
+}
+
+/**
+ * kfree - free previously allocated memory
+ * @object: pointer returned by kmalloc() or kmem_cache_alloc()
+ *
+ * If @object is NULL, no operation is performed.
+ */
+void kfree(const void *object)
+{
+       struct folio *folio;
+       struct slab *slab;
+       struct kmem_cache *s;
+       void *x = (void *)object;
+
+       trace_kfree(_RET_IP_, object);
+
+       if (unlikely(ZERO_OR_NULL_PTR(object)))
+               return;
+
+       folio = virt_to_folio(object);
+       if (unlikely(!folio_test_slab(folio))) {
+               free_large_kmalloc(folio, (void *)object);
+               return;
+       }
+
+       slab = folio_slab(folio);
+       s = slab->slab_cache;
+       slab_free(s, slab, x, _RET_IP_);
+}
+EXPORT_SYMBOL(kfree);
+
  struct detached_freelist {
         struct slab *slab;
         void *tail;
@@ -3892,6 +4482,27 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
         return same;
  }
  
+/*
+ * Internal bulk free of objects that were not initialised by the post alloc
+ * hooks and thus should not be processed by the free hooks
+ */
+static void __kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
+{
+       if (!size)
+               return;
+
+       do {
+               struct detached_freelist df;
+
+               size = build_detached_freelist(s, size, p, &df);
+               if (!df.slab)
+                       continue;
+
+               do_slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt,
+                            _RET_IP_);
+       } while (likely(size));
+}
+
  /* Note that interrupts must be enabled when calling this function. */
  void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
  {
@@ -3905,15 +4516,16 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
                 if (!df.slab)
                         continue;
  
-               slab_free(df.s, df.slab, df.freelist, df.tail, &p[size], df.cnt,
-                         _RET_IP_);
+               slab_free_bulk(df.s, df.slab, df.freelist, df.tail, &p[size],
+                              df.cnt, _RET_IP_);
         } while (likely(size));
  }
  EXPORT_SYMBOL(kmem_cache_free_bulk);
  
  #ifndef CONFIG_SLUB_TINY
-static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
-                       size_t size, void **p, struct obj_cgroup *objcg)
+static inline
+int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+                           void **p)
  {
         struct kmem_cache_cpu *c;
         unsigned long irqflags;
@@ -3967,6 +4579,7 @@ static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
                 c->freelist = get_freepointer(s, object);
                 p[i] = object;
                 maybe_wipe_obj_freeptr(s, p[i]);
+               stat(s, ALLOC_FASTPATH);
         }
         c->tid = next_tid(c->tid);
         local_unlock_irqrestore(&s->cpu_slab->lock, irqflags);
@@ -3976,14 +4589,13 @@ static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
  
  error:
         slub_put_cpu_ptr(s->cpu_slab);
-       slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
-       kmem_cache_free_bulk(s, i, p);
+       __kmem_cache_free_bulk(s, i, p);
         return 0;
  
  }
  #else /* CONFIG_SLUB_TINY */
  static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
-                       size_t size, void **p, struct obj_cgroup *objcg)
+                                  size_t size, void **p)
  {
         int i;
  
@@ -4006,8 +4618,7 @@ static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
         return i;
  
  error:
-       slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
-       kmem_cache_free_bulk(s, i, p);
+       __kmem_cache_free_bulk(s, i, p);
         return 0;
  }
  #endif /* CONFIG_SLUB_TINY */
@@ -4027,15 +4638,19 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
         if (unlikely(!s))
                 return 0;
  
-       i = __kmem_cache_alloc_bulk(s, flags, size, p, objcg);
+       i = __kmem_cache_alloc_bulk(s, flags, size, p);
  
         /*
          * memcg and kmem_cache debug support and memory initialization.
          * Done outside of the IRQ disabled fastpath loop.
          */
-       if (i != 0)
+       if (likely(i != 0)) {
                 slab_post_alloc_hook(s, objcg, flags, size, p,
                         slab_want_init_on_alloc(flags, s), s->object_size);
+       } else {
+               memcg_slab_alloc_error_hook(s, size, objcg);
+       }
+
         return i;
  }
  EXPORT_SYMBOL(kmem_cache_alloc_bulk);