mm: vmalloc: remove a global vmap_blocks xarray

author Uladzislau Rezki (Sony) <urezki@gmail.com>

Thu, 30 Mar 2023 19:06:38 +0000 (21:06 +0200)

committer Andrew Morton <akpm@linux-foundation.org>

Tue, 18 Apr 2023 23:29:47 +0000 (16:29 -0700)
author Uladzislau Rezki (Sony) <urezki@gmail.com>
Thu, 30 Mar 2023 19:06:38 +0000 (21:06 +0200)
committer Andrew Morton <akpm@linux-foundation.org>
Tue, 18 Apr 2023 23:29:47 +0000 (16:29 -0700)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c

index 63a2255..add3328 100644 (file)
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1915,6 +1915,13 @@ static struct vmap_area *find_unlink_vmap_area(unsigned long addr)
  struct vmap_block_queue {
         spinlock_t lock;
         struct list_head free;
+
+       /*
+        * An xarray requires an extra memory dynamically to
+        * be allocated. If it is an issue, we can use rb-tree
+        * instead.
+        */
+       struct xarray vmap_blocks;
  };
  
  struct vmap_block {
@@ -1932,11 +1939,48 @@ struct vmap_block {
  static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
  
  /*
- * XArray of vmap blocks, indexed by address, to quickly find a vmap block
- * in the free path. Could get rid of this if we change the API to return a
- * "cookie" from alloc, to be passed to free. But no big deal yet.
+ * In order to fast access to any "vmap_block" associated with a
+ * specific address, we use a hash.
+ *
+ * A per-cpu vmap_block_queue is used in both ways, to serialize
+ * an access to free block chains among CPUs(alloc path) and it
+ * also acts as a vmap_block hash(alloc/free paths). It means we
+ * overload it, since we already have the per-cpu array which is
+ * used as a hash table. When used as a hash a 'cpu' passed to
+ * per_cpu() is not actually a CPU but rather a hash index.
+ *
+ * A hash function is addr_to_vb_xarray() which hashes any address
+ * to a specific index(in a hash) it belongs to. This then uses a
+ * per_cpu() macro to access an array with generated index.
+ *
+ * An example:
+ *
+ *  CPU_1  CPU_2  CPU_0
+ *    |      |      |
+ *    V      V      V
+ * 0     10     20     30     40     50     60
+ * |------|------|------|------|------|------|...<vmap address space>
+ *   CPU0   CPU1   CPU2   CPU0   CPU1   CPU2
+ *
+ * - CPU_1 invokes vm_unmap_ram(6), 6 belongs to CPU0 zone, thus
+ *   it access: CPU0/INDEX0 -> vmap_blocks -> xa_lock;
+ *
+ * - CPU_2 invokes vm_unmap_ram(11), 11 belongs to CPU1 zone, thus
+ *   it access: CPU1/INDEX1 -> vmap_blocks -> xa_lock;
+ *
+ * - CPU_0 invokes vm_unmap_ram(20), 20 belongs to CPU2 zone, thus
+ *   it access: CPU2/INDEX2 -> vmap_blocks -> xa_lock.
+ *
+ * This technique almost always avoids lock contention on insert/remove,
+ * however xarray spinlocks protect against any contention that remains.
   */
-static DEFINE_XARRAY(vmap_blocks);
+static struct xarray *
+addr_to_vb_xarray(unsigned long addr)
+{
+       int index = (addr / VMAP_BLOCK_SIZE) % num_possible_cpus();
+
+       return &per_cpu(vmap_block_queue, index).vmap_blocks;
+}
  
  /*
   * We should probably have a fallback mechanism to allocate virtual memory
@@ -1974,6 +2018,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
         struct vmap_block_queue *vbq;
         struct vmap_block *vb;
         struct vmap_area *va;
+       struct xarray *xa;
         unsigned long vb_idx;
         int node, err;
         void *vaddr;
@@ -2007,8 +2052,9 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
         bitmap_set(vb->used_map, 0, (1UL << order));
         INIT_LIST_HEAD(&vb->free_list);
  
+       xa = addr_to_vb_xarray(va->va_start);
         vb_idx = addr_to_vb_idx(va->va_start);
-       err = xa_insert(&vmap_blocks, vb_idx, vb, gfp_mask);
+       err = xa_insert(xa, vb_idx, vb, gfp_mask);
         if (err) {
                 kfree(vb);
                 free_vmap_area(va);
@@ -2026,8 +2072,10 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
  static void free_vmap_block(struct vmap_block *vb)
  {
         struct vmap_block *tmp;
+       struct xarray *xa;
  
-       tmp = xa_erase(&vmap_blocks, addr_to_vb_idx(vb->va->va_start));
+       xa = addr_to_vb_xarray(vb->va->va_start);
+       tmp = xa_erase(xa, addr_to_vb_idx(vb->va->va_start));
         BUG_ON(tmp != vb);
  
         spin_lock(&vmap_area_lock);
@@ -2139,6 +2187,7 @@ static void vb_free(unsigned long addr, unsigned long size)
         unsigned long offset;
         unsigned int order;
         struct vmap_block *vb;
+       struct xarray *xa;
  
         BUG_ON(offset_in_page(size));
         BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
@@ -2147,7 +2196,10 @@ static void vb_free(unsigned long addr, unsigned long size)
  
         order = get_order(size);
         offset = (addr & (VMAP_BLOCK_SIZE - 1)) >> PAGE_SHIFT;
-       vb = xa_load(&vmap_blocks, addr_to_vb_idx(addr));
+
+       xa = addr_to_vb_xarray(addr);
+       vb = xa_load(xa, addr_to_vb_idx(addr));
+
         spin_lock(&vb->lock);
         bitmap_clear(vb->used_map, offset, (1UL << order));
         spin_unlock(&vb->lock);
@@ -3525,6 +3577,7 @@ static size_t vmap_ram_vread_iter(struct iov_iter *iter, const char *addr,
  {
         char *start;
         struct vmap_block *vb;
+       struct xarray *xa;
         unsigned long offset;
         unsigned int rs, re;
         size_t remains, n;
@@ -3543,7 +3596,8 @@ static size_t vmap_ram_vread_iter(struct iov_iter *iter, const char *addr,
          * Area is split into regions and tracked with vmap_block, read out
          * each region and zero fill the hole between regions.
          */
-       vb = xa_load(&vmap_blocks, addr_to_vb_idx((unsigned long)addr));
+       xa = addr_to_vb_xarray((unsigned long) addr);
+       vb = xa_load(xa, addr_to_vb_idx((unsigned long)addr));
         if (!vb)
                 goto finished_zero;
  
@@ -4337,6 +4391,7 @@ void __init vmalloc_init(void)
                 p = &per_cpu(vfree_deferred, i);
                 init_llist_head(&p->list);
                 INIT_WORK(&p->wq, delayed_vfree_work);
+               xa_init(&vbq->vmap_blocks);
         }
  
         /* Import existing vmlist entries. */
author	Uladzislau Rezki (Sony) <urezki@gmail.com>
	Thu, 30 Mar 2023 19:06:38 +0000 (21:06 +0200)
committer	Andrew Morton <akpm@linux-foundation.org>
	Tue, 18 Apr 2023 23:29:47 +0000 (16:29 -0700)