lib/sort.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * A fast, small, non-recursive O(nlog n) sort for the Linux kernel
   4  *
   5  * Jan 23 2005  Matt Mackall <mpm@selenic.com>
   6  */
   7
   8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   9
  10 #include <linux/types.h>
  11 #include <linux/export.h>
  12 #include <linux/sort.h>
  13
  14 /**
  15  * is_aligned - is this pointer & size okay for word-wide copying?
  16  * @base: pointer to data
  17  * @size: size of each element
  18  * @align: required aignment (typically 4 or 8)
  19  *
  20  * Returns true if elements can be copied using word loads and stores.
  21  * The size must be a multiple of the alignment, and the base address must
  22  * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
  23  *
  24  * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)"
  25  * to "if ((a | b) & mask)", so we do that by hand.
  26  */
  27 __attribute_const__ __always_inline
  28 static bool is_aligned(const void *base, size_t size, unsigned char align)
  29 {
  30         unsigned char lsbits = (unsigned char)size;
  31
  32         (void)base;
  33 #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
  34         lsbits |= (unsigned char)(uintptr_t)base;
  35 #endif
  36         return (lsbits & (align - 1)) == 0;
  37 }
  38
  39 /**
  40  * swap_words_32 - swap two elements in 32-bit chunks
  41  * @a, @b: pointers to the elements
  42  * @size: element size (must be a multiple of 4)
  43  *
  44  * Exchange the two objects in memory.  This exploits base+index addressing,
  45  * which basically all CPUs have, to minimize loop overhead computations.
  46  *
  47  * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
  48  * bottom of the loop, even though the zero flag is stil valid from the
  49  * subtract (since the intervening mov instructions don't alter the flags).
  50  * Gcc 8.1.0 doesn't have that problem.
  51  */
  52 static void swap_words_32(void *a, void *b, int size)
  53 {
  54         size_t n = (unsigned int)size;
  55
  56         do {
  57                 u32 t = *(u32 *)(a + (n -= 4));
  58                 *(u32 *)(a + n) = *(u32 *)(b + n);
  59                 *(u32 *)(b + n) = t;
  60         } while (n);
  61 }
  62
  63 /**
  64  * swap_words_64 - swap two elements in 64-bit chunks
  65  * @a, @b: pointers to the elements
  66  * @size: element size (must be a multiple of 8)
  67  *
  68  * Exchange the two objects in memory.  This exploits base+index
  69  * addressing, which basically all CPUs have, to minimize loop overhead
  70  * computations.
  71  *
  72  * We'd like to use 64-bit loads if possible.  If they're not, emulating
  73  * one requires base+index+4 addressing which x86 has but most other
  74  * processors do not.  If CONFIG_64BIT, we definitely have 64-bit loads,
  75  * but it's possible to have 64-bit loads without 64-bit pointers (e.g.
  76  * x32 ABI).  Are there any cases the kernel needs to worry about?
  77  */
  78 static void swap_words_64(void *a, void *b, int size)
  79 {
  80         size_t n = (unsigned int)size;
  81
  82         do {
  83 #ifdef CONFIG_64BIT
  84                 u64 t = *(u64 *)(a + (n -= 8));
  85                 *(u64 *)(a + n) = *(u64 *)(b + n);
  86                 *(u64 *)(b + n) = t;
  87 #else
  88                 /* Use two 32-bit transfers to avoid base+index+4 addressing */
  89                 u32 t = *(u32 *)(a + (n -= 4));
  90                 *(u32 *)(a + n) = *(u32 *)(b + n);
  91                 *(u32 *)(b + n) = t;
  92
  93                 t = *(u32 *)(a + (n -= 4));
  94                 *(u32 *)(a + n) = *(u32 *)(b + n);
  95                 *(u32 *)(b + n) = t;
  96 #endif
  97         } while (n);
  98 }
  99
 100 /**
 101  * swap_bytes - swap two elements a byte at a time
 102  * @a, @b: pointers to the elements
 103  * @size: element size
 104  *
 105  * This is the fallback if alignment doesn't allow using larger chunks.
 106  */
 107 static void swap_bytes(void *a, void *b, int size)
 108 {
 109         size_t n = (unsigned int)size;
 110
 111         do {
 112                 char t = ((char *)a)[--n];
 113                 ((char *)a)[n] = ((char *)b)[n];
 114                 ((char *)b)[n] = t;
 115         } while (n);
 116 }
 117
 118 /**
 119  * sort - sort an array of elements
 120  * @base: pointer to data to sort
 121  * @num: number of elements
 122  * @size: size of each element
 123  * @cmp_func: pointer to comparison function
 124  * @swap_func: pointer to swap function or NULL
 125  *
 126  * This function does a heapsort on the given array.  You may provide
 127  * a swap_func function if you need to do something more than a memory
 128  * copy (e.g. fix up pointers or auxiliary data), but the built-in swap
 129  * isn't usually a bottleneck.
 130  *
 131  * Sorting time is O(n log n) both on average and worst-case. While
 132  * qsort is about 20% faster on average, it suffers from exploitable
 133  * O(n*n) worst-case behavior and extra memory requirements that make
 134  * it less suitable for kernel use.
 135  */
 136
 137 void sort(void *base, size_t num, size_t size,
 138           int (*cmp_func)(const void *, const void *),
 139           void (*swap_func)(void *, void *, int size))
 140 {
 141         /* pre-scale counters for performance */
 142         int i = (num/2 - 1) * size, n = num * size, c, r;
 143
 144         if (!swap_func) {
 145                 if (is_aligned(base, size, 8))
 146                         swap_func = swap_words_64;
 147                 else if (is_aligned(base, size, 4))
 148                         swap_func = swap_words_32;
 149                 else
 150                         swap_func = swap_bytes;
 151         }
 152
 153         /* heapify */
 154         for ( ; i >= 0; i -= size) {
 155                 for (r = i; r * 2 + size < n; r  = c) {
 156                         c = r * 2 + size;
 157                         if (c < n - size &&
 158                                         cmp_func(base + c, base + c + size) < 0)
 159                                 c += size;
 160                         if (cmp_func(base + r, base + c) >= 0)
 161                                 break;
 162                         swap_func(base + r, base + c, size);
 163                 }
 164         }
 165
 166         /* sort */
 167         for (i = n - size; i > 0; i -= size) {
 168                 swap_func(base, base + i, size);
 169                 for (r = 0; r * 2 + size < i; r = c) {
 170                         c = r * 2 + size;
 171                         if (c < i - size &&
 172                                         cmp_func(base + c, base + c + size) < 0)
 173                                 c += size;
 174                         if (cmp_func(base + r, base + c) >= 0)
 175                                 break;
 176                         swap_func(base + r, base + c, size);
 177                 }
 178         }
 179 }
 180
 181 EXPORT_SYMBOL(sort);