generic radix trees
authorKent Overstreet <kent.overstreet@gmail.com>
Tue, 12 Mar 2019 06:31:14 +0000 (23:31 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 12 Mar 2019 17:04:02 +0000 (10:04 -0700)
Very simple radix tree implementation that supports storing arbitrary
size entries, up to PAGE_SIZE - upcoming patches will convert existing
flex_array users to genradixes.  The new genradix code has a much
simpler API and implementation, and doesn't have a hard limit on the
number of elements like flex_array does.

Link: http://lkml.kernel.org/r/20181217131929.11727-5-kent.overstreet@gmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Eric Paris <eparis@parisplace.org>
Cc: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Documentation/core-api/generic-radix-tree.rst [new file with mode: 0644]
Documentation/core-api/index.rst
include/linux/generic-radix-tree.h [new file with mode: 0644]
lib/Makefile
lib/generic-radix-tree.c [new file with mode: 0644]

diff --git a/Documentation/core-api/generic-radix-tree.rst b/Documentation/core-api/generic-radix-tree.rst
new file mode 100644 (file)
index 0000000..ed42839
--- /dev/null
@@ -0,0 +1,12 @@
+=================================
+Generic radix trees/sparse arrays
+=================================
+
+.. kernel-doc:: include/linux/generic-radix-tree.h
+   :doc: Generic radix trees/sparse arrays
+
+generic radix tree functions
+----------------------------
+
+.. kernel-doc:: include/linux/generic-radix-tree.h
+   :functions:
index 3adee82..6870baf 100644 (file)
@@ -28,6 +28,7 @@ Core utilities
    errseq
    printk-formats
    circular-buffers
+   generic-radix-tree
    memory-allocation
    mm-api
    gfp_mask-from-fs-io
diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h
new file mode 100644 (file)
index 0000000..3a91130
--- /dev/null
@@ -0,0 +1,231 @@
+#ifndef _LINUX_GENERIC_RADIX_TREE_H
+#define _LINUX_GENERIC_RADIX_TREE_H
+
+/**
+ * DOC: Generic radix trees/sparse arrays:
+ *
+ * Very simple and minimalistic, supporting arbitrary size entries up to
+ * PAGE_SIZE.
+ *
+ * A genradix is defined with the type it will store, like so:
+ *
+ * static GENRADIX(struct foo) foo_genradix;
+ *
+ * The main operations are:
+ *
+ * - genradix_init(radix) - initialize an empty genradix
+ *
+ * - genradix_free(radix) - free all memory owned by the genradix and
+ *   reinitialize it
+ *
+ * - genradix_ptr(radix, idx) - gets a pointer to the entry at idx, returning
+ *   NULL if that entry does not exist
+ *
+ * - genradix_ptr_alloc(radix, idx, gfp) - gets a pointer to an entry,
+ *   allocating it if necessary
+ *
+ * - genradix_for_each(radix, iter, p) - iterate over each entry in a genradix
+ *
+ * The radix tree allocates one page of entries at a time, so entries may exist
+ * that were never explicitly allocated - they will be initialized to all
+ * zeroes.
+ *
+ * Internally, a genradix is just a radix tree of pages, and indexing works in
+ * terms of byte offsets. The wrappers in this header file use sizeof on the
+ * type the radix contains to calculate a byte offset from the index - see
+ * __idx_to_offset.
+ */
+
+#include <asm/page.h>
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+
+struct genradix_root;
+
+struct __genradix {
+       struct genradix_root __rcu      *root;
+};
+
+/*
+ * NOTE: currently, sizeof(_type) must not be larger than PAGE_SIZE:
+ */
+
+#define __GENRADIX_INITIALIZER                                 \
+       {                                                       \
+               .tree = {                                       \
+                       .root = NULL,                           \
+               }                                               \
+       }
+
+/*
+ * We use a 0 size array to stash the type we're storing without taking any
+ * space at runtime - then the various accessor macros can use typeof() to get
+ * to it for casts/sizeof - we also force the alignment so that storing a type
+ * with a ridiculous alignment doesn't blow up the alignment or size of the
+ * genradix.
+ */
+
+#define GENRADIX(_type)                                                \
+struct {                                                       \
+       struct __genradix       tree;                           \
+       _type                   type[0] __aligned(1);           \
+}
+
+#define DEFINE_GENRADIX(_name, _type)                          \
+       GENRADIX(_type) _name = __GENRADIX_INITIALIZER
+
+/**
+ * genradix_init - initialize a genradix
+ * @_radix:    genradix to initialize
+ *
+ * Does not fail
+ */
+#define genradix_init(_radix)                                  \
+do {                                                           \
+       *(_radix) = (typeof(*_radix)) __GENRADIX_INITIALIZER;   \
+} while (0)
+
+void __genradix_free(struct __genradix *);
+
+/**
+ * genradix_free: free all memory owned by a genradix
+ * @_radix: the genradix to free
+ *
+ * After freeing, @_radix will be reinitialized and empty
+ */
+#define genradix_free(_radix)  __genradix_free(&(_radix)->tree)
+
+static inline size_t __idx_to_offset(size_t idx, size_t obj_size)
+{
+       if (__builtin_constant_p(obj_size))
+               BUILD_BUG_ON(obj_size > PAGE_SIZE);
+       else
+               BUG_ON(obj_size > PAGE_SIZE);
+
+       if (!is_power_of_2(obj_size)) {
+               size_t objs_per_page = PAGE_SIZE / obj_size;
+
+               return (idx / objs_per_page) * PAGE_SIZE +
+                       (idx % objs_per_page) * obj_size;
+       } else {
+               return idx * obj_size;
+       }
+}
+
+#define __genradix_cast(_radix)                (typeof((_radix)->type[0]) *)
+#define __genradix_obj_size(_radix)    sizeof((_radix)->type[0])
+#define __genradix_idx_to_offset(_radix, _idx)                 \
+       __idx_to_offset(_idx, __genradix_obj_size(_radix))
+
+void *__genradix_ptr(struct __genradix *, size_t);
+
+/**
+ * genradix_ptr - get a pointer to a genradix entry
+ * @_radix:    genradix to access
+ * @_idx:      index to fetch
+ *
+ * Returns a pointer to entry at @_idx, or NULL if that entry does not exist.
+ */
+#define genradix_ptr(_radix, _idx)                             \
+       (__genradix_cast(_radix)                                \
+        __genradix_ptr(&(_radix)->tree,                        \
+                       __genradix_idx_to_offset(_radix, _idx)))
+
+void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t);
+
+/**
+ * genradix_ptr_alloc - get a pointer to a genradix entry, allocating it
+ *                     if necessary
+ * @_radix:    genradix to access
+ * @_idx:      index to fetch
+ * @_gfp:      gfp mask
+ *
+ * Returns a pointer to entry at @_idx, or NULL on allocation failure
+ */
+#define genradix_ptr_alloc(_radix, _idx, _gfp)                 \
+       (__genradix_cast(_radix)                                \
+        __genradix_ptr_alloc(&(_radix)->tree,                  \
+                       __genradix_idx_to_offset(_radix, _idx), \
+                       _gfp))
+
+struct genradix_iter {
+       size_t                  offset;
+       size_t                  pos;
+};
+
+/**
+ * genradix_iter_init - initialize a genradix_iter
+ * @_radix:    genradix that will be iterated over
+ * @_idx:      index to start iterating from
+ */
+#define genradix_iter_init(_radix, _idx)                       \
+       ((struct genradix_iter) {                               \
+               .pos    = (_idx),                               \
+               .offset = __genradix_idx_to_offset((_radix), (_idx)),\
+       })
+
+void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t);
+
+/**
+ * genradix_iter_peek - get first entry at or above iterator's current
+ *                     position
+ * @_iter:     a genradix_iter
+ * @_radix:    genradix being iterated over
+ *
+ * If no more entries exist at or above @_iter's current position, returns NULL
+ */
+#define genradix_iter_peek(_iter, _radix)                      \
+       (__genradix_cast(_radix)                                \
+        __genradix_iter_peek(_iter, &(_radix)->tree,           \
+                             PAGE_SIZE / __genradix_obj_size(_radix)))
+
+static inline void __genradix_iter_advance(struct genradix_iter *iter,
+                                          size_t obj_size)
+{
+       iter->offset += obj_size;
+
+       if (!is_power_of_2(obj_size) &&
+           (iter->offset & (PAGE_SIZE - 1)) + obj_size > PAGE_SIZE)
+               iter->offset = round_up(iter->offset, PAGE_SIZE);
+
+       iter->pos++;
+}
+
+#define genradix_iter_advance(_iter, _radix)                   \
+       __genradix_iter_advance(_iter, __genradix_obj_size(_radix))
+
+#define genradix_for_each_from(_radix, _iter, _p, _start)      \
+       for (_iter = genradix_iter_init(_radix, _start);        \
+            (_p = genradix_iter_peek(&_iter, _radix)) != NULL; \
+            genradix_iter_advance(&_iter, _radix))
+
+/**
+ * genradix_for_each - iterate over entry in a genradix
+ * @_radix:    genradix to iterate over
+ * @_iter:     a genradix_iter to track current position
+ * @_p:                pointer to genradix entry type
+ *
+ * On every iteration, @_p will point to the current entry, and @_iter.pos
+ * will be the current entry's index.
+ */
+#define genradix_for_each(_radix, _iter, _p)                   \
+       genradix_for_each_from(_radix, _iter, _p, 0)
+
+int __genradix_prealloc(struct __genradix *, size_t, gfp_t);
+
+/**
+ * genradix_prealloc - preallocate entries in a generic radix tree
+ * @_radix:    genradix to preallocate
+ * @_nr:       number of entries to preallocate
+ * @_gfp:      gfp mask
+ *
+ * Returns 0 on success, -ENOMEM on failure
+ */
+#define genradix_prealloc(_radix, _nr, _gfp)                   \
+        __genradix_prealloc(&(_radix)->tree,                   \
+                       __genradix_idx_to_offset(_radix, _nr + 1),\
+                       _gfp)
+
+
+#endif /* _LINUX_GENERIC_RADIX_TREE_H */
index 6475179..b798b41 100644 (file)
@@ -38,7 +38,8 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \
         gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
         bsearch.o find_bit.o llist.o memweight.o kfifo.o \
         percpu-refcount.o rhashtable.o reciprocal_div.o \
-        once.o refcount.o usercopy.o errseq.o bucket_locks.o
+        once.o refcount.o usercopy.o errseq.o bucket_locks.o \
+        generic-radix-tree.o
 obj-$(CONFIG_STRING_SELFTEST) += test_string.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c
new file mode 100644 (file)
index 0000000..a7bafc4
--- /dev/null
@@ -0,0 +1,217 @@
+
+#include <linux/export.h>
+#include <linux/generic-radix-tree.h>
+#include <linux/gfp.h>
+
+#define GENRADIX_ARY           (PAGE_SIZE / sizeof(struct genradix_node *))
+#define GENRADIX_ARY_SHIFT     ilog2(GENRADIX_ARY)
+
+struct genradix_node {
+       union {
+               /* Interior node: */
+               struct genradix_node    *children[GENRADIX_ARY];
+
+               /* Leaf: */
+               u8                      data[PAGE_SIZE];
+       };
+};
+
+static inline int genradix_depth_shift(unsigned depth)
+{
+       return PAGE_SHIFT + GENRADIX_ARY_SHIFT * depth;
+}
+
+/*
+ * Returns size (of data, in bytes) that a tree of a given depth holds:
+ */
+static inline size_t genradix_depth_size(unsigned depth)
+{
+       return 1UL << genradix_depth_shift(depth);
+}
+
+/* depth that's needed for a genradix that can address up to ULONG_MAX: */
+#define GENRADIX_MAX_DEPTH     \
+       DIV_ROUND_UP(BITS_PER_LONG - PAGE_SHIFT, GENRADIX_ARY_SHIFT)
+
+#define GENRADIX_DEPTH_MASK                            \
+       ((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1))
+
+unsigned genradix_root_to_depth(struct genradix_root *r)
+{
+       return (unsigned long) r & GENRADIX_DEPTH_MASK;
+}
+
+struct genradix_node *genradix_root_to_node(struct genradix_root *r)
+{
+       return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK);
+}
+
+/*
+ * Returns pointer to the specified byte @offset within @radix, or NULL if not
+ * allocated
+ */
+void *__genradix_ptr(struct __genradix *radix, size_t offset)
+{
+       struct genradix_root *r = READ_ONCE(radix->root);
+       struct genradix_node *n = genradix_root_to_node(r);
+       unsigned level          = genradix_root_to_depth(r);
+
+       if (ilog2(offset) >= genradix_depth_shift(level))
+               return NULL;
+
+       while (1) {
+               if (!n)
+                       return NULL;
+               if (!level)
+                       break;
+
+               level--;
+
+               n = n->children[offset >> genradix_depth_shift(level)];
+               offset &= genradix_depth_size(level) - 1;
+       }
+
+       return &n->data[offset];
+}
+EXPORT_SYMBOL(__genradix_ptr);
+
+/*
+ * Returns pointer to the specified byte @offset within @radix, allocating it if
+ * necessary - newly allocated slots are always zeroed out:
+ */
+void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
+                          gfp_t gfp_mask)
+{
+       struct genradix_root *v = READ_ONCE(radix->root);
+       struct genradix_node *n, *new_node = NULL;
+       unsigned level;
+
+       /* Increase tree depth if necessary: */
+       while (1) {
+               struct genradix_root *r = v, *new_root;
+
+               n       = genradix_root_to_node(r);
+               level   = genradix_root_to_depth(r);
+
+               if (n && ilog2(offset) < genradix_depth_shift(level))
+                       break;
+
+               if (!new_node) {
+                       new_node = (void *)
+                               __get_free_page(gfp_mask|__GFP_ZERO);
+                       if (!new_node)
+                               return NULL;
+               }
+
+               new_node->children[0] = n;
+               new_root = ((struct genradix_root *)
+                           ((unsigned long) new_node | (n ? level + 1 : 0)));
+
+               if ((v = cmpxchg_release(&radix->root, r, new_root)) == r) {
+                       v = new_root;
+                       new_node = NULL;
+               }
+       }
+
+       while (level--) {
+               struct genradix_node **p =
+                       &n->children[offset >> genradix_depth_shift(level)];
+               offset &= genradix_depth_size(level) - 1;
+
+               n = READ_ONCE(*p);
+               if (!n) {
+                       if (!new_node) {
+                               new_node = (void *)
+                                       __get_free_page(gfp_mask|__GFP_ZERO);
+                               if (!new_node)
+                                       return NULL;
+                       }
+
+                       if (!(n = cmpxchg_release(p, NULL, new_node)))
+                               swap(n, new_node);
+               }
+       }
+
+       if (new_node)
+               free_page((unsigned long) new_node);
+
+       return &n->data[offset];
+}
+EXPORT_SYMBOL(__genradix_ptr_alloc);
+
+void *__genradix_iter_peek(struct genradix_iter *iter,
+                          struct __genradix *radix,
+                          size_t objs_per_page)
+{
+       struct genradix_root *r;
+       struct genradix_node *n;
+       unsigned level, i;
+restart:
+       r = READ_ONCE(radix->root);
+       if (!r)
+               return NULL;
+
+       n       = genradix_root_to_node(r);
+       level   = genradix_root_to_depth(r);
+
+       if (ilog2(iter->offset) >= genradix_depth_shift(level))
+               return NULL;
+
+       while (level) {
+               level--;
+
+               i = (iter->offset >> genradix_depth_shift(level)) &
+                       (GENRADIX_ARY - 1);
+
+               while (!n->children[i]) {
+                       i++;
+                       iter->offset = round_down(iter->offset +
+                                          genradix_depth_size(level),
+                                          genradix_depth_size(level));
+                       iter->pos = (iter->offset >> PAGE_SHIFT) *
+                               objs_per_page;
+                       if (i == GENRADIX_ARY)
+                               goto restart;
+               }
+
+               n = n->children[i];
+       }
+
+       return &n->data[iter->offset & (PAGE_SIZE - 1)];
+}
+EXPORT_SYMBOL(__genradix_iter_peek);
+
+static void genradix_free_recurse(struct genradix_node *n, unsigned level)
+{
+       if (level) {
+               unsigned i;
+
+               for (i = 0; i < GENRADIX_ARY; i++)
+                       if (n->children[i])
+                               genradix_free_recurse(n->children[i], level - 1);
+       }
+
+       free_page((unsigned long) n);
+}
+
+int __genradix_prealloc(struct __genradix *radix, size_t size,
+                       gfp_t gfp_mask)
+{
+       size_t offset;
+
+       for (offset = 0; offset < size; offset += PAGE_SIZE)
+               if (!__genradix_ptr_alloc(radix, offset, gfp_mask))
+                       return -ENOMEM;
+
+       return 0;
+}
+EXPORT_SYMBOL(__genradix_prealloc);
+
+void __genradix_free(struct __genradix *radix)
+{
+       struct genradix_root *r = xchg(&radix->root, NULL);
+
+       genradix_free_recurse(genradix_root_to_node(r),
+                             genradix_root_to_depth(r));
+}
+EXPORT_SYMBOL(__genradix_free);